diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2121,7 +2121,7 @@ Address ZeroAddrBound = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".bound.zero.addr"); - CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); + CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); llvm::SmallVector OutlinedFnArgs; // ThreadId for serialized parallels is 0. OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); @@ -4780,8 +4780,8 @@ LValue NumLVal = CGF.MakeAddrLValue( CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), C.getUIntPtrType()); - CGF.InitTempAlloca(NumLVal.getAddress(CGF), - llvm::ConstantInt::get(CGF.IntPtrTy, 0)); + CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), + NumLVal.getAddress(CGF)); llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); CGF.EmitStoreOfScalar(Add, NumLVal); diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1500,7 +1500,7 @@ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr); llvm::SmallVector OutlinedFnArgs; OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer()); OutlinedFnArgs.push_back(ZeroAddr.getPointer()); @@ -3488,7 +3488,7 @@ Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, /*Name=*/".zero.addr"); - CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0)); + CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddr); // Get the array of arguments. SmallVector Args; diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp @@ -301,7 +301,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -337,6 +336,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -637,7 +637,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -673,6 +672,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -784,7 +784,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -829,6 +828,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1129,7 +1129,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1165,6 +1164,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1276,7 +1276,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -1321,6 +1320,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1598,7 +1598,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1634,6 +1633,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1934,7 +1934,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1970,6 +1969,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2081,7 +2081,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -2126,6 +2125,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2426,7 +2426,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2462,6 +2461,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2573,7 +2573,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -2618,6 +2617,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2895,7 +2895,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2931,6 +2930,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3231,7 +3231,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3267,6 +3266,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3378,7 +3378,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -3423,6 +3422,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -3723,7 +3723,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3759,6 +3758,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3870,7 +3870,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -3915,6 +3914,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -4192,7 +4192,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4228,6 +4227,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4528,7 +4528,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4564,6 +4563,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4675,7 +4675,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -4720,6 +4719,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5020,7 +5020,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5056,6 +5055,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5167,7 +5167,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -5212,6 +5211,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5489,7 +5489,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5525,6 +5524,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5825,7 +5825,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5861,6 +5860,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5972,7 +5972,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -6017,6 +6016,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6317,7 +6317,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6353,6 +6352,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6464,7 +6464,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -6509,6 +6508,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6786,7 +6786,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6822,6 +6821,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7122,7 +7122,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7158,6 +7157,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7269,7 +7269,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -7314,6 +7313,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -7614,7 +7614,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7650,6 +7649,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7761,7 +7761,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -7806,6 +7805,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -8083,7 +8083,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8119,6 +8118,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8419,7 +8419,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8455,6 +8454,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8566,7 +8566,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -8611,6 +8610,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -8911,7 +8911,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8947,6 +8946,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9058,7 +9058,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -9103,6 +9102,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK13-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9380,7 +9380,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9416,6 +9415,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9716,7 +9716,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9752,6 +9751,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9863,7 +9863,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -9908,6 +9907,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10208,7 +10208,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10244,6 +10243,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10355,7 +10355,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -10400,6 +10399,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10495,4 +10495,3 @@ // CHECK14-NEXT: call void @__tgt_register_requires(i64 1) // CHECK14-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -200,7 +200,7 @@ // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -277,7 +277,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -312,7 +312,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -348,6 +347,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -356,7 +356,7 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -434,7 +434,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -563,7 +563,7 @@ // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -641,7 +641,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -676,7 +676,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -712,6 +711,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -720,7 +720,7 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -798,7 +798,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -837,7 +837,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -882,6 +881,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -892,7 +892,7 @@ // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -970,7 +970,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1097,7 +1097,7 @@ // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1175,7 +1175,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1210,7 +1210,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1246,6 +1245,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1254,7 +1254,7 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1332,7 +1332,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1371,7 +1371,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -1416,6 +1415,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1426,7 +1426,7 @@ // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1504,7 +1504,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1609,7 +1609,7 @@ // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1686,7 +1686,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1721,7 +1721,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1757,6 +1756,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1765,7 +1765,7 @@ // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1843,7 +1843,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1972,7 +1972,7 @@ // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2050,7 +2050,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2085,7 +2085,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2121,6 +2120,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2129,7 +2129,7 @@ // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2207,7 +2207,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2246,7 +2246,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -2291,6 +2290,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2301,7 +2301,7 @@ // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2379,7 +2379,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2506,7 +2506,7 @@ // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2584,7 +2584,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2619,7 +2619,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2655,6 +2654,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2663,7 +2663,7 @@ // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2741,7 +2741,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2780,7 +2780,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -2825,6 +2824,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2835,7 +2835,7 @@ // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2913,7 +2913,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3018,7 +3018,7 @@ // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3095,7 +3095,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3130,7 +3130,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3166,6 +3165,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3174,7 +3174,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3252,7 +3252,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3381,7 +3381,7 @@ // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3459,7 +3459,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3494,7 +3494,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3530,6 +3529,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3538,7 +3538,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3616,7 +3616,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3659,8 +3659,6 @@ // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -3719,6 +3717,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -3729,7 +3728,7 @@ // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END23:%.*]] // CHECK3: omp_if.else7: @@ -3759,6 +3758,7 @@ // CHECK3: omp_if.else17: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 // CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK3-NEXT: br label [[OMP_IF_END19]] @@ -3769,7 +3769,7 @@ // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK3-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK3: omp.inner.for.end22: // CHECK3-NEXT: br label [[OMP_IF_END23]] // CHECK3: omp_if.end23: @@ -3856,7 +3856,7 @@ // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -3895,7 +3895,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -3984,7 +3984,7 @@ // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -4023,7 +4023,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -4154,7 +4154,7 @@ // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4232,7 +4232,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4267,7 +4267,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4303,6 +4302,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4311,7 +4311,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4389,7 +4389,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4428,7 +4428,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -4473,6 +4472,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -4483,7 +4483,7 @@ // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4561,7 +4561,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4666,7 +4666,7 @@ // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4743,7 +4743,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4778,7 +4778,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4814,6 +4813,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4822,7 +4822,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4900,7 +4900,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5029,7 +5029,7 @@ // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5107,7 +5107,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5142,7 +5142,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5178,6 +5177,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5186,7 +5186,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5264,7 +5264,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5307,8 +5307,6 @@ // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -5367,6 +5365,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -5377,7 +5376,7 @@ // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END23:%.*]] // CHECK4: omp_if.else7: @@ -5407,6 +5406,7 @@ // CHECK4: omp_if.else17: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 // CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK4-NEXT: br label [[OMP_IF_END19]] @@ -5417,7 +5417,7 @@ // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK4-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK4: omp.inner.for.end22: // CHECK4-NEXT: br label [[OMP_IF_END23]] // CHECK4: omp_if.end23: @@ -5504,7 +5504,7 @@ // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5543,7 +5543,7 @@ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5632,7 +5632,7 @@ // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5671,7 +5671,7 @@ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5802,7 +5802,7 @@ // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5880,7 +5880,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5915,7 +5915,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5951,6 +5950,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5959,7 +5959,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6037,7 +6037,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6076,7 +6076,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -6121,6 +6120,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -6131,7 +6131,7 @@ // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6209,7 +6209,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6250,22 +6250,22 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6275,23 +6275,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z9gtid_testv() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6324,23 +6324,23 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: call void @_Z3fn4v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6350,23 +6350,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z3fn5v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6376,23 +6376,23 @@ // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK5-NEXT: call void @_Z3fn6v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6427,23 +6427,23 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: call void @_Z3fn1v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6453,23 +6453,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z3fn2v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6479,23 +6479,23 @@ // CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK5-NEXT: call void @_Z3fn3v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6521,22 +6521,22 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6546,23 +6546,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z9gtid_testv() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6595,23 +6595,23 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: call void @_Z3fn4v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6621,23 +6621,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z3fn5v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6647,23 +6647,23 @@ // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK6-NEXT: call void @_Z3fn6v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6698,23 +6698,23 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: call void @_Z3fn1v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6724,23 +6724,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z3fn2v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6750,23 +6750,23 @@ // CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK6-NEXT: call void @_Z3fn3v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6792,22 +6792,22 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6817,23 +6817,23 @@ // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK7-NEXT: call void @_Z9gtid_testv() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6867,23 +6867,23 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: call void @_Z3fn4v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6927,23 +6927,23 @@ // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK7-NEXT: call void @_Z3fn6v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7003,23 +7003,23 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: call void @_Z3fn1v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7055,23 +7055,23 @@ // CHECK7-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK7-NEXT: call void @_Z3fn3v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7097,22 +7097,22 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7122,23 +7122,23 @@ // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK8-NEXT: call void @_Z9gtid_testv() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7172,23 +7172,23 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: call void @_Z3fn4v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7232,23 +7232,23 @@ // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK8-NEXT: call void @_Z3fn6v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7308,23 +7308,23 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: call void @_Z3fn1v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7360,23 +7360,23 @@ // CHECK8-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK8-NEXT: call void @_Z3fn3v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7466,7 +7466,7 @@ // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7543,7 +7543,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7578,7 +7578,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7614,6 +7613,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7622,7 +7622,7 @@ // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7700,7 +7700,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7829,7 +7829,7 @@ // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7907,7 +7907,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7942,7 +7942,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7978,6 +7977,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7986,7 +7986,7 @@ // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8064,7 +8064,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8103,7 +8103,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -8148,6 +8147,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -8158,7 +8158,7 @@ // CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8236,7 +8236,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8363,7 +8363,7 @@ // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8441,7 +8441,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8476,7 +8476,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8512,6 +8511,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8520,7 +8520,7 @@ // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8598,7 +8598,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8637,7 +8637,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -8682,6 +8681,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -8692,7 +8692,7 @@ // CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8770,7 +8770,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8875,7 +8875,7 @@ // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -8952,7 +8952,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -8987,7 +8987,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9023,6 +9022,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9031,7 +9031,7 @@ // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9109,7 +9109,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9238,7 +9238,7 @@ // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9316,7 +9316,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9351,7 +9351,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9387,6 +9386,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9395,7 +9395,7 @@ // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9473,7 +9473,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9512,7 +9512,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -9557,6 +9556,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -9567,7 +9567,7 @@ // CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9645,7 +9645,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9772,7 +9772,7 @@ // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9850,7 +9850,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9885,7 +9885,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9921,6 +9920,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9929,7 +9929,7 @@ // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10007,7 +10007,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10046,7 +10046,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -10091,6 +10090,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -10101,7 +10101,7 @@ // CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10179,7 +10179,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10284,7 +10284,7 @@ // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10361,7 +10361,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10396,7 +10396,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10432,6 +10431,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10440,7 +10440,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10518,7 +10518,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10647,7 +10647,7 @@ // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10725,7 +10725,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10760,7 +10760,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10796,6 +10795,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10804,7 +10804,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10882,7 +10882,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10925,8 +10925,6 @@ // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -10985,6 +10983,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -10995,7 +10994,7 @@ // CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END23:%.*]] // CHECK11: omp_if.else7: @@ -11025,6 +11024,7 @@ // CHECK11: omp_if.else17: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 // CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK11-NEXT: br label [[OMP_IF_END19]] @@ -11035,7 +11035,7 @@ // CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK11-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK11: omp.inner.for.end22: // CHECK11-NEXT: br label [[OMP_IF_END23]] // CHECK11: omp_if.end23: @@ -11122,7 +11122,7 @@ // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11161,7 +11161,7 @@ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11250,7 +11250,7 @@ // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11289,7 +11289,7 @@ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11420,7 +11420,7 @@ // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11498,7 +11498,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11533,7 +11533,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -11569,6 +11568,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11577,7 +11577,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11655,7 +11655,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11694,7 +11694,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -11739,6 +11738,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -11749,7 +11749,7 @@ // CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11827,7 +11827,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11932,7 +11932,7 @@ // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12009,7 +12009,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12044,7 +12044,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12080,6 +12079,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12088,7 +12088,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12166,7 +12166,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12295,7 +12295,7 @@ // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12373,7 +12373,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12408,7 +12408,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12444,6 +12443,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12452,7 +12452,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12530,7 +12530,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12573,8 +12573,6 @@ // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED12:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR18:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -12633,6 +12631,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP17]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -12643,7 +12642,7 @@ // CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END23:%.*]] // CHECK12: omp_if.else7: @@ -12673,6 +12672,7 @@ // CHECK12: omp_if.else17: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK12-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR18]], align 4 // CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP29]], i32* [[DOTBOUND_ZERO_ADDR18]], i64 [[TMP23]], i64 [[TMP25]], i64 [[TMP27]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP3]]) // CHECK12-NEXT: br label [[OMP_IF_END19]] @@ -12683,7 +12683,7 @@ // CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP30]], [[TMP31]] // CHECK12-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK12: omp.inner.for.end22: // CHECK12-NEXT: br label [[OMP_IF_END23]] // CHECK12: omp_if.end23: @@ -12770,7 +12770,7 @@ // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -12809,7 +12809,7 @@ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -12898,7 +12898,7 @@ // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -12937,7 +12937,7 @@ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -13068,7 +13068,7 @@ // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13146,7 +13146,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13181,7 +13181,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -13217,6 +13216,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13225,7 +13225,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13303,7 +13303,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13342,7 +13342,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[ARG]], i32** [[ARG_ADDR]], align 8 @@ -13387,6 +13386,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP13]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -13397,7 +13397,7 @@ // CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13475,7 +13475,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13516,22 +13516,22 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13541,23 +13541,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z9gtid_testv() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13590,23 +13590,23 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: call void @_Z3fn4v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13616,23 +13616,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z3fn5v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13642,23 +13642,23 @@ // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 -// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK13-NEXT: call void @_Z3fn6v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13693,23 +13693,23 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: call void @_Z3fn1v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13719,23 +13719,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z3fn2v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13745,23 +13745,23 @@ // CHECK13-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK13-NEXT: call void @_Z3fn3v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13787,22 +13787,22 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13812,23 +13812,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z9gtid_testv() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13861,23 +13861,23 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: call void @_Z3fn4v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13887,23 +13887,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z3fn5v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13913,23 +13913,23 @@ // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 -// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK14-NEXT: call void @_Z3fn6v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -13964,23 +13964,23 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: call void @_Z3fn1v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -13990,23 +13990,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z3fn2v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14016,23 +14016,23 @@ // CHECK14-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK14-NEXT: call void @_Z3fn3v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14058,22 +14058,22 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14083,23 +14083,23 @@ // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK15-NEXT: call void @_Z9gtid_testv() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14133,23 +14133,23 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: call void @_Z3fn4v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14193,23 +14193,23 @@ // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK15-NEXT: call void @_Z3fn6v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14269,23 +14269,23 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: call void @_Z3fn1v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14321,23 +14321,23 @@ // CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK15-NEXT: call void @_Z3fn3v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14363,22 +14363,22 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14388,23 +14388,23 @@ // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK16-NEXT: call void @_Z9gtid_testv() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14438,23 +14438,23 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: call void @_Z3fn4v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14498,23 +14498,23 @@ // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK16-NEXT: call void @_Z3fn6v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -14574,23 +14574,23 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: call void @_Z3fn1v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14626,23 +14626,23 @@ // CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 -// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK16-NEXT: call void @_Z3fn3v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp --- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp +++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp @@ -91,7 +91,7 @@ // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 2, i32* @_ZZ4mainE1a, align 4 // CHECK1-NEXT: store double 3.000000e+00, double* [[B]], align 8 -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v() #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]] // CHECK1-NEXT: ret i32 [[CALL]] // // @@ -127,7 +127,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[BAR_A]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP0]] to double // CHECK1-NEXT: store double [[CONV]], double* addrspacecast (double addrspace(3)* @bar_b to double*), align 8 -// CHECK1-NEXT: call void @_Z3bazRf(float* nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR6]] +// CHECK1-NEXT: call void @_Z3bazRf(float* nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]] // CHECK1-NEXT: ret void // // @@ -138,9 +138,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -447,15 +447,15 @@ // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** // CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR1:[0-9]+]] +// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3:[0-9]+]] // CHECK-NEXT: ret void // // @@ -488,9 +488,9 @@ // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -499,6 +499,6 @@ // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** // CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 -// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR1]] +// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]] // CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -2984,7 +2984,6 @@ // CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -3004,6 +3003,7 @@ // CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP6]], i32* [[CONV1]], align 4 // CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -3318,7 +3318,6 @@ // CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 @@ -3336,6 +3335,7 @@ // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -3641,7 +3641,6 @@ // CHECK6-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 // CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 @@ -3659,6 +3658,7 @@ // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP6]], i32* [[ARGC_CASTED]], align 4 // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP7]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] // CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -49,7 +49,7 @@ // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]] // CHECK1-NEXT: ret void // // @@ -70,7 +70,7 @@ // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: -// CHECK1-NEXT: call void @_Z3usev() #[[ATTR6]] +// CHECK1-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK1-NEXT: ret void // CHECK1: worker.exit: @@ -84,20 +84,20 @@ // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @_Z4workv() #[[ATTR6]] +// CHECK1-NEXT: call void @_Z4workv() #[[ATTR7]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK1-NEXT: ret void @@ -127,7 +127,7 @@ // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]] // CHECK2-NEXT: ret void // // @@ -148,7 +148,7 @@ // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: -// CHECK2-NEXT: call void @_Z3usev() #[[ATTR6]] +// CHECK2-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK2-NEXT: ret void // CHECK2: worker.exit: @@ -162,20 +162,20 @@ // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK2-NEXT: call void @_Z4workv() #[[ATTR6]] +// CHECK2-NEXT: call void @_Z4workv() #[[ATTR7]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK2-NEXT: ret void @@ -205,7 +205,7 @@ // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: call void @_Z3usev() #[[ATTR6:[0-9]+]] +// CHECK3-NEXT: call void @_Z3usev() #[[ATTR7:[0-9]+]] // CHECK3-NEXT: ret void // // @@ -226,7 +226,7 @@ // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: -// CHECK3-NEXT: call void @_Z3usev() #[[ATTR6]] +// CHECK3-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) // CHECK3-NEXT: ret void // CHECK3: worker.exit: @@ -240,20 +240,20 @@ // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: call void @_Z4workv() #[[ATTR6]] +// CHECK3-NEXT: call void @_Z4workv() #[[ATTR7]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: ret void diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -45,7 +45,7 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2) // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* @@ -84,7 +84,7 @@ // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -95,9 +95,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -118,7 +118,7 @@ // CHECK1-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 8 -// CHECK1-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK1-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -132,7 +132,7 @@ // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* -// CHECK1-NEXT: call void @__atomic_load(i64 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]] +// CHECK1-NEXT: call void @__atomic_load(i64 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]] // CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK1: atomic_cont: // CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 @@ -141,7 +141,7 @@ // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* -// CHECK1-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]] +// CHECK1-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]] // CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK1: atomic_exit: // CHECK1-NEXT: ret void @@ -154,9 +154,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -178,7 +178,7 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2) // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* @@ -217,7 +217,7 @@ // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]] +// CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]] // CHECK2-NEXT: ret void // // @@ -228,9 +228,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -251,7 +251,7 @@ // CHECK2-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4 -// CHECK2-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK2-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]] // CHECK2-NEXT: ret void // // @@ -265,7 +265,7 @@ // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* -// CHECK2-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]] +// CHECK2-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]] // CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK2: atomic_cont: // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 @@ -274,7 +274,7 @@ // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* -// CHECK2-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]] +// CHECK2-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]] // CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK2: atomic_exit: // CHECK2-NEXT: ret void @@ -287,9 +287,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -311,7 +311,7 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 2) // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* @@ -350,7 +350,7 @@ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32* [[C]], i32** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 -// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR5]] +// CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR6]] // CHECK3-NEXT: ret void // // @@ -361,9 +361,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -384,7 +384,7 @@ // CHECK3-NEXT: store i32** [[C]], i32*** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[TMP0]], align 4 -// CHECK3-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK3-NEXT: call void @_Z4workPi(i32* [[TMP1]]) #[[ATTR6]] // CHECK3-NEXT: ret void // // @@ -398,7 +398,7 @@ // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* -// CHECK3-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR5]] +// CHECK3-NEXT: call void @__atomic_load(i32 4, i8* [[TMP1]], i8* [[TMP2]], i32 0) #[[ATTR6]] // CHECK3-NEXT: br label [[ATOMIC_CONT:%.*]] // CHECK3: atomic_cont: // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[ATOMIC_TEMP]], align 4 @@ -407,7 +407,7 @@ // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP0]] to i8* // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[ATOMIC_TEMP]] to i8* // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[ATOMIC_TEMP1]] to i8* -// CHECK3-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR5]] +// CHECK3-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 4, i8* [[TMP4]], i8* [[TMP5]], i8* [[TMP6]], i32 0, i32 0) #[[ATTR6]] // CHECK3-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] // CHECK3: atomic_exit: // CHECK3-NEXT: ret void @@ -420,9 +420,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -1485,9 +1485,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: ret void @@ -1512,9 +1512,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: ret void @@ -1539,9 +1539,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: ret void @@ -1613,9 +1613,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: ret void @@ -1701,9 +1701,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -1760,9 +1760,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1:[0-9]+]] // CHECK2-NEXT: ret void @@ -1787,9 +1787,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK2-NEXT: ret void @@ -1814,9 +1814,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK2-NEXT: ret void @@ -1886,9 +1886,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR1]] // CHECK2-NEXT: ret void @@ -1973,9 +1973,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -590,9 +590,9 @@ // CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -516,9 +516,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -899,9 +899,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -1282,9 +1282,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -35,12 +35,12 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -579,9 +579,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -602,12 +602,12 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1160,9 +1160,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -1269,12 +1269,12 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -1813,9 +1813,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -1836,12 +1836,12 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -2394,9 +2394,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -2503,12 +2503,12 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -3047,9 +3047,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -3070,12 +3070,12 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -3628,9 +3628,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -56,7 +56,6 @@ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i8* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -68,6 +67,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_CASTED]] to i8* // CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -97,7 +97,6 @@ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) @@ -109,6 +108,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -138,7 +138,6 @@ // CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) @@ -150,6 +149,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AA_CASTED]] to i16* // CHECK1-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]]) #[[ATTR1]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) @@ -221,7 +221,6 @@ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -233,6 +232,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* // CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -262,7 +262,6 @@ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) @@ -274,6 +273,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -303,7 +303,6 @@ // CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) @@ -315,6 +314,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK2-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) @@ -386,7 +386,6 @@ // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[A_ADDR]] to i8* // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -398,6 +397,7 @@ // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[A_CASTED]] to i8* // CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -427,7 +427,6 @@ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) @@ -439,6 +438,7 @@ // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -468,7 +468,6 @@ // CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 true) @@ -480,6 +479,7 @@ // CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[AA_CASTED]] to i16* // CHECK3-NEXT: store i16 [[TMP2]], i16* [[CONV1]], align 2 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]]) #[[ATTR1]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -579,12 +579,12 @@ // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -684,9 +684,9 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 @@ -701,12 +701,12 @@ // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -806,9 +806,9 @@ // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 @@ -823,12 +823,12 @@ // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR4:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -928,9 +928,9 @@ // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -18477,7 +18477,6 @@ // CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 @@ -18497,6 +18496,7 @@ // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -18802,7 +18802,6 @@ // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -18816,6 +18815,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -19059,7 +19059,6 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) @@ -19067,6 +19066,7 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -19248,7 +19248,6 @@ // CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -19262,6 +19261,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -19481,7 +19481,6 @@ // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -19495,6 +19494,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -19799,7 +19799,6 @@ // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 @@ -19815,6 +19814,7 @@ // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20071,7 +20071,6 @@ // CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 @@ -20091,6 +20090,7 @@ // CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20396,7 +20396,6 @@ // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -20410,6 +20409,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20653,7 +20653,6 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) @@ -20661,6 +20660,7 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -20842,7 +20842,6 @@ // CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -20856,6 +20855,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -21075,7 +21075,6 @@ // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -21089,6 +21088,7 @@ // CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -21388,7 +21388,6 @@ // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 @@ -21404,6 +21403,7 @@ // CHECK2-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -21660,7 +21660,6 @@ // CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 @@ -21676,6 +21675,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -21968,7 +21968,6 @@ // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 @@ -21980,6 +21979,7 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22214,7 +22214,6 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) @@ -22222,6 +22221,7 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22397,7 +22397,6 @@ // CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -22409,6 +22408,7 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22618,7 +22618,6 @@ // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -22630,6 +22629,7 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -22934,7 +22934,6 @@ // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 @@ -22948,6 +22947,7 @@ // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23194,7 +23194,6 @@ // CHECK4-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 @@ -23210,6 +23209,7 @@ // CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23502,7 +23502,6 @@ // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 @@ -23514,6 +23513,7 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23748,7 +23748,6 @@ // CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) @@ -23756,6 +23755,7 @@ // CHECK4-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK4: user_code.entry: // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -23931,7 +23931,6 @@ // CHECK4-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK4-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -23943,6 +23942,7 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -24152,7 +24152,6 @@ // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -24164,6 +24163,7 @@ // CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -24468,7 +24468,6 @@ // CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 @@ -24482,6 +24481,7 @@ // CHECK4-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 // CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32* [[TMP5]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_generic_mode_codegen.cpp @@ -39,7 +39,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -59,6 +58,7 @@ // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -349,7 +349,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -365,6 +364,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -643,7 +643,6 @@ // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -659,6 +658,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -937,7 +937,6 @@ // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -957,6 +956,7 @@ // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* // CHECK4-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], i32* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -1247,7 +1247,6 @@ // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -1263,6 +1262,7 @@ // CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -1541,7 +1541,6 @@ // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK6-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK6-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -1557,6 +1556,7 @@ // CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 // CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], i32* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK6-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -9390,7 +9390,6 @@ // CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 @@ -9410,6 +9409,7 @@ // CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV3]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i32]* [[TMP0]], i64 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -9739,7 +9739,6 @@ // CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 // CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* @@ -9753,6 +9752,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -10020,7 +10020,6 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) @@ -10028,6 +10027,7 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -10223,7 +10223,6 @@ // CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 @@ -10237,6 +10236,7 @@ // CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* // CHECK1-NEXT: store i32 [[TMP3]], i32* [[CONV1]], align 4 // CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -10474,7 +10474,6 @@ // CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 @@ -10490,6 +10489,7 @@ // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -10806,7 +10806,6 @@ // CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 @@ -10818,6 +10817,7 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11076,7 +11076,6 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) @@ -11084,6 +11083,7 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11273,7 +11273,6 @@ // CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -11285,6 +11284,7 @@ // CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11512,7 +11512,6 @@ // CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 @@ -11528,6 +11527,7 @@ // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[L_CASTED]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i32]* [[TMP0]], i32 [[TMP6]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -11844,7 +11844,6 @@ // CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 @@ -11856,6 +11855,7 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP4]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -12114,7 +12114,6 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 false, i1 false) @@ -12122,6 +12121,7 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB4]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) @@ -12311,7 +12311,6 @@ // CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 @@ -12323,6 +12322,7 @@ // CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[F_CASTED]], align 4 // CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 false) diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -896,7 +896,6 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -908,6 +907,7 @@ // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK1-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 4) @@ -937,7 +937,6 @@ // CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -948,6 +947,7 @@ // CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK1-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i64 8) @@ -977,7 +977,6 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -988,6 +987,7 @@ // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK2-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) @@ -1017,7 +1017,6 @@ // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 @@ -1028,6 +1027,7 @@ // CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK2-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) @@ -1059,7 +1059,6 @@ // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK3-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 @@ -1075,6 +1074,7 @@ // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC3]] to i32* // CHECK3-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC3]], i64 4) @@ -1106,7 +1106,6 @@ // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 @@ -1121,6 +1120,7 @@ // CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC2]] to i8*** // CHECK3-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 8 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC2]], i64 8) @@ -1152,7 +1152,6 @@ // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -1165,6 +1164,7 @@ // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i32* // CHECK4-NEXT: store i32 [[TMP1]], i32* [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) @@ -1196,7 +1196,6 @@ // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 // CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 @@ -1209,6 +1208,7 @@ // CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC1]] to i8*** // CHECK4-NEXT: store i8** [[TMP1]], i8*** [[ARGC_ON_STACK]], align 4 // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC1]], i32 4) diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -4212,7 +4212,6 @@ // CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[E]], i64* [[E_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[E_ADDR]] to double* // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -4224,6 +4223,7 @@ // CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* // CHECK1-NEXT: store double [[TMP1]], double* [[E_ON_STACK]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR4:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]], i64 8) @@ -4512,7 +4512,6 @@ // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 // CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[C_ADDR]] to i8* @@ -4529,6 +4528,7 @@ // CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK1-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i64 4) @@ -4894,7 +4894,6 @@ // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -4904,6 +4903,7 @@ // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR4]] // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) @@ -5534,7 +5534,6 @@ // CHECK2-NEXT: [[E1:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -5544,6 +5543,7 @@ // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 // CHECK2-NEXT: store double [[TMP3]], double* [[E1]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -5831,7 +5831,6 @@ // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 // CHECK2-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* @@ -5848,6 +5847,7 @@ // CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK2-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) @@ -6213,7 +6213,6 @@ // CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* @@ -6222,6 +6221,7 @@ // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]] // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) @@ -6852,7 +6852,6 @@ // CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) @@ -6862,6 +6861,7 @@ // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP0]], align 8 // CHECK3-NEXT: store double [[TMP3]], double* [[E1]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E1]]) #[[ATTR4:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -7149,7 +7149,6 @@ // CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 // CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* @@ -7166,6 +7165,7 @@ // CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D3]] to float* // CHECK3-NEXT: store float [[TMP2]], float* [[D_ON_STACK]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP3]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C2]], float* [[D_ON_STACK]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D3]], i32 4) @@ -7531,7 +7531,6 @@ // CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* @@ -7540,6 +7539,7 @@ // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK3: user_code.entry: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 // CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR4]] // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) diff --git a/clang/test/OpenMP/parallel_if_codegen.cpp b/clang/test/OpenMP/parallel_if_codegen.cpp --- a/clang/test/OpenMP/parallel_if_codegen.cpp +++ b/clang/test/OpenMP/parallel_if_codegen.cpp @@ -80,13 +80,13 @@ // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK1-NEXT: ret void @@ -111,13 +111,12 @@ // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -129,6 +128,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -179,13 +179,12 @@ // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -197,6 +196,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -250,13 +250,13 @@ // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK2-NEXT: ret void @@ -281,13 +281,12 @@ // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -299,6 +298,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK2-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -349,13 +349,12 @@ // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -367,6 +366,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -420,13 +420,13 @@ // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK5-NEXT: ret void @@ -451,13 +451,12 @@ // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK5-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -469,6 +468,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK5-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -519,13 +519,12 @@ // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK5-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -537,6 +536,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK5-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -590,13 +590,13 @@ // CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK6-NEXT: ret void @@ -621,13 +621,12 @@ // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK6-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -639,6 +638,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK6-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -689,13 +689,12 @@ // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK6-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -707,6 +706,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK6-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -760,13 +760,13 @@ // CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK7-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: call void @.omp_outlined..1(i32* [[TMP2]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) // CHECK7-NEXT: ret void @@ -791,13 +791,12 @@ // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK7-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..2 to void (i32*, i32*, ...)*)) // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* @Arg, align 4 @@ -809,6 +808,7 @@ // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK7-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] @@ -859,13 +859,12 @@ // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_1:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR2:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK7-NEXT: store i32 [[ARG]], i32* [[ARG_ADDR]], align 4 // CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..5 to void (i32*, i32*, ...)*)) // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG_ADDR]], align 4 @@ -877,6 +876,7 @@ // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_1]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR2]], align 4 // CHECK7-NEXT: call void @.omp_outlined..7(i32* [[DOTTHREADID_TEMP_1]], i32* [[DOTBOUND_ZERO_ADDR2]]) #[[ATTR2]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] @@ -916,4 +916,3 @@ // CHECK7-NEXT: call void @_Z3fn3v() // CHECK7-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp --- a/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp +++ b/clang/test/OpenMP/parallel_if_codegen_PR51349.cpp @@ -17,10 +17,10 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA3:![0-9]+]] +// CHECK-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp @@ -67,7 +67,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -111,6 +110,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -993,7 +993,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -1037,6 +1036,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp --- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp @@ -70,7 +70,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -114,6 +113,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -915,7 +915,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -959,6 +958,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1760,7 +1760,6 @@ // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -1804,6 +1803,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -2660,7 +2660,6 @@ // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED10:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 @@ -2704,6 +2703,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32* [[I]], i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]], i64 [[TMP10]], i64 [[TMP12]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] diff --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp --- a/clang/test/OpenMP/target_codegen_global_capture.cpp +++ b/clang/test/OpenMP/target_codegen_global_capture.cpp @@ -365,7 +365,6 @@ // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -417,6 +416,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -678,7 +678,6 @@ // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -730,6 +729,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1010,7 +1010,6 @@ // CHECK1-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK1-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -1062,6 +1061,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1307,7 +1307,6 @@ // CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -1359,6 +1358,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1620,7 +1620,6 @@ // CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -1672,6 +1671,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1952,7 +1952,6 @@ // CHECK2-NEXT: [[SD_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 // CHECK2-NEXT: store i64 [[GB]], i64* [[GB_ADDR]], align 8 @@ -2004,6 +2003,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV6]], double* [[CONV7]], float* [[CONV8]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2237,7 +2237,6 @@ // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -2295,6 +2294,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -2544,7 +2544,6 @@ // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -2602,6 +2601,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -2870,7 +2870,6 @@ // CHECK3-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK3-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -2928,6 +2927,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -3161,7 +3161,6 @@ // CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -3219,6 +3218,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -3468,7 +3468,6 @@ // CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -3526,6 +3525,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -3794,7 +3794,6 @@ // CHECK4-NEXT: [[GD8:%.*]] = alloca double, align 8 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 // CHECK4-NEXT: store double* [[GB]], double** [[GB_ADDR]], align 4 @@ -3852,6 +3851,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i16* [[CONV4]], double* [[GD8]], float* [[CONV5]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -3897,4 +3897,3 @@ // CHECK4-NEXT: call void @__tgt_register_requires(i64 1) // CHECK4-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -7890,7 +7890,6 @@ // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK5-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK5-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -7923,6 +7922,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -9593,7 +9593,6 @@ // CHECK6-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK6-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -9626,6 +9625,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR4]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -11263,7 +11263,6 @@ // CHECK7-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK7-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK7-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -11294,6 +11293,7 @@ // CHECK7: omp_if.else: // CHECK7-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK7-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]] // CHECK7-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK7-NEXT: br label [[OMP_IF_END]] @@ -12924,7 +12924,6 @@ // CHECK8-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK8-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK8-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK8-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -12955,6 +12954,7 @@ // CHECK8: omp_if.else: // CHECK8-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK8-NEXT: call void @.omp_outlined..9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR4]] // CHECK8-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK8-NEXT: br label [[OMP_IF_END]] @@ -21312,7 +21312,6 @@ // CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK21-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK21-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK21-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK21-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -21345,6 +21344,7 @@ // CHECK21: omp_if.else: // CHECK21-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK21-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK21-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK21-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: br label [[OMP_IF_END]] @@ -22240,7 +22240,6 @@ // CHECK22-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK22-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK22-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK22-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK22-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK22-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK22-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -22273,6 +22272,7 @@ // CHECK22: omp_if.else: // CHECK22-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK22-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK22-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK22-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP6]], i64 [[TMP2]], i64 [[TMP3]], i16* [[TMP4]], i64 [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK22-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK22-NEXT: br label [[OMP_IF_END]] @@ -23150,7 +23150,6 @@ // CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK23-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK23-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK23-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK23-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -23181,6 +23180,7 @@ // CHECK23: omp_if.else: // CHECK23-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK23-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK23-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK23-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: br label [[OMP_IF_END]] @@ -24054,7 +24054,6 @@ // CHECK24-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK24-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK24-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK24-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK24-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) // CHECK24-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK24-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -24085,6 +24084,7 @@ // CHECK24: omp_if.else: // CHECK24-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK24-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK24-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK24-NEXT: call void @.omp_outlined..5(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP6]], i32 [[TMP2]], i32 [[TMP3]], i16* [[TMP4]], i32 [[TMP8]]) #[[ATTR2:[0-9]+]] // CHECK24-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) // CHECK24-NEXT: br label [[OMP_IF_END]] diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -491,7 +491,6 @@ // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -512,6 +511,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -547,7 +547,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -562,6 +561,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -590,7 +590,6 @@ // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -603,6 +602,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -644,7 +644,6 @@ // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -654,6 +653,7 @@ // CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void @@ -1035,7 +1035,6 @@ // CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -1056,6 +1055,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1091,7 +1091,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1106,6 +1105,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1134,7 +1134,6 @@ // CHECK2-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -1147,6 +1146,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -1188,7 +1188,6 @@ // CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -1198,6 +1197,7 @@ // CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK2-NEXT: ret void @@ -1576,7 +1576,6 @@ // CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -1595,6 +1594,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -1629,7 +1629,6 @@ // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -1644,6 +1643,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -1672,7 +1672,6 @@ // CHECK3-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -1685,6 +1684,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -1726,7 +1726,6 @@ // CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -1734,6 +1733,7 @@ // CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK3-NEXT: ret void @@ -2108,7 +2108,6 @@ // CHECK4-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -2127,6 +2126,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -2161,7 +2161,6 @@ // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2176,6 +2175,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -2204,7 +2204,6 @@ // CHECK4-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -2217,6 +2216,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -2258,7 +2258,6 @@ // CHECK4-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -2266,6 +2265,7 @@ // CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK4-NEXT: ret void @@ -2340,7 +2340,6 @@ // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -2353,6 +2352,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -2396,7 +2396,6 @@ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -2417,6 +2416,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -2452,7 +2452,6 @@ // CHECK9-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2467,6 +2466,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -2496,7 +2496,6 @@ // CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK9-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -2506,6 +2505,7 @@ // CHECK9-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: ret void @@ -2577,7 +2577,6 @@ // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -2590,6 +2589,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -2633,7 +2633,6 @@ // CHECK10-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -2654,6 +2653,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -2689,7 +2689,6 @@ // CHECK10-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2704,6 +2703,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -2733,7 +2733,6 @@ // CHECK10-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK10-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -2743,6 +2742,7 @@ // CHECK10-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK10-NEXT: ret void @@ -2814,7 +2814,6 @@ // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK11-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -2827,6 +2826,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -2870,7 +2870,6 @@ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -2889,6 +2888,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -2923,7 +2923,6 @@ // CHECK11-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK11-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -2938,6 +2937,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -2967,7 +2967,6 @@ // CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK11-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -2975,6 +2974,7 @@ // CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: ret void @@ -3042,7 +3042,6 @@ // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK12-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -3055,6 +3054,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -3098,7 +3098,6 @@ // CHECK12-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK12-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK12-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -3117,6 +3116,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -3151,7 +3151,6 @@ // CHECK12-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK12-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK12-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -3166,6 +3165,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -3195,7 +3195,6 @@ // CHECK12-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK12-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -3203,6 +3202,7 @@ // CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK12-NEXT: ret void @@ -3573,7 +3573,6 @@ // CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -3594,6 +3593,7 @@ // CHECK17: omp_if.else: // CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: br label [[OMP_IF_END]] @@ -3629,7 +3629,6 @@ // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3644,6 +3643,7 @@ // CHECK17: omp_if.else: // CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: br label [[OMP_IF_END]] @@ -3672,7 +3672,6 @@ // CHECK17-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -3685,6 +3684,7 @@ // CHECK17: omp_if.else: // CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: br label [[OMP_IF_END]] @@ -3726,7 +3726,6 @@ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK17-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK17-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK17-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -3736,6 +3735,7 @@ // CHECK17-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK17-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK17-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] // CHECK17-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK17-NEXT: ret void @@ -4117,7 +4117,6 @@ // CHECK18-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -4138,6 +4137,7 @@ // CHECK18: omp_if.else: // CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR3]] // CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: br label [[OMP_IF_END]] @@ -4173,7 +4173,6 @@ // CHECK18-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4188,6 +4187,7 @@ // CHECK18: omp_if.else: // CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: br label [[OMP_IF_END]] @@ -4216,7 +4216,6 @@ // CHECK18-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -4229,6 +4228,7 @@ // CHECK18: omp_if.else: // CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: br label [[OMP_IF_END]] @@ -4270,7 +4270,6 @@ // CHECK18-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK18-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK18-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK18-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK18-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -4280,6 +4279,7 @@ // CHECK18-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK18-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK18-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR3]] // CHECK18-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK18-NEXT: ret void @@ -4658,7 +4658,6 @@ // CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -4677,6 +4676,7 @@ // CHECK19: omp_if.else: // CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: br label [[OMP_IF_END]] @@ -4711,7 +4711,6 @@ // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -4726,6 +4725,7 @@ // CHECK19: omp_if.else: // CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: br label [[OMP_IF_END]] @@ -4754,7 +4754,6 @@ // CHECK19-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK19-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -4767,6 +4766,7 @@ // CHECK19: omp_if.else: // CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: br label [[OMP_IF_END]] @@ -4808,7 +4808,6 @@ // CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK19-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK19-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -4816,6 +4815,7 @@ // CHECK19-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK19-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK19-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] // CHECK19-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK19-NEXT: ret void @@ -5190,7 +5190,6 @@ // CHECK20-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK20-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -5209,6 +5208,7 @@ // CHECK20: omp_if.else: // CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR3]] // CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: br label [[OMP_IF_END]] @@ -5243,7 +5243,6 @@ // CHECK20-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -5258,6 +5257,7 @@ // CHECK20: omp_if.else: // CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR3]] // CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: br label [[OMP_IF_END]] @@ -5286,7 +5286,6 @@ // CHECK20-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK20-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -5299,6 +5298,7 @@ // CHECK20: omp_if.else: // CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR3]] // CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: br label [[OMP_IF_END]] @@ -5340,7 +5340,6 @@ // CHECK20-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK20-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK20-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK20-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -5348,6 +5347,7 @@ // CHECK20-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK20-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK20-NEXT: call void @.omp_outlined..8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR3]] // CHECK20-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK20-NEXT: ret void @@ -5422,7 +5422,6 @@ // CHECK25-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -5435,6 +5434,7 @@ // CHECK25: omp_if.else: // CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: br label [[OMP_IF_END]] @@ -5478,7 +5478,6 @@ // CHECK25-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -5499,6 +5498,7 @@ // CHECK25: omp_if.else: // CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] // CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: br label [[OMP_IF_END]] @@ -5534,7 +5534,6 @@ // CHECK25-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK25-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5549,6 +5548,7 @@ // CHECK25: omp_if.else: // CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: br label [[OMP_IF_END]] @@ -5578,7 +5578,6 @@ // CHECK25-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK25-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK25-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK25-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK25-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -5588,6 +5587,7 @@ // CHECK25-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK25-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK25-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK25-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] // CHECK25-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK25-NEXT: ret void @@ -5659,7 +5659,6 @@ // CHECK26-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -5672,6 +5671,7 @@ // CHECK26: omp_if.else: // CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: br label [[OMP_IF_END]] @@ -5715,7 +5715,6 @@ // CHECK26-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 @@ -5736,6 +5735,7 @@ // CHECK26: omp_if.else: // CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i64 [[TMP3]]) #[[ATTR2]] // CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: br label [[OMP_IF_END]] @@ -5771,7 +5771,6 @@ // CHECK26-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 // CHECK26-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5786,6 +5785,7 @@ // CHECK26: omp_if.else: // CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: br label [[OMP_IF_END]] @@ -5815,7 +5815,6 @@ // CHECK26-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK26-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK26-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK26-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 // CHECK26-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* @@ -5825,6 +5824,7 @@ // CHECK26-NEXT: [[TMP2:%.*]] = load i64, i64* [[A_CASTED]], align 8 // CHECK26-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK26-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK26-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]]) #[[ATTR2]] // CHECK26-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK26-NEXT: ret void @@ -5896,7 +5896,6 @@ // CHECK27-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK27-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -5909,6 +5908,7 @@ // CHECK27: omp_if.else: // CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: br label [[OMP_IF_END]] @@ -5952,7 +5952,6 @@ // CHECK27-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK27-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK27-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -5971,6 +5970,7 @@ // CHECK27: omp_if.else: // CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] // CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: br label [[OMP_IF_END]] @@ -6005,7 +6005,6 @@ // CHECK27-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK27-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK27-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -6020,6 +6019,7 @@ // CHECK27: omp_if.else: // CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: br label [[OMP_IF_END]] @@ -6049,7 +6049,6 @@ // CHECK27-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK27-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK27-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK27-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -6057,6 +6056,7 @@ // CHECK27-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK27-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK27-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK27-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] // CHECK27-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK27-NEXT: ret void @@ -6124,7 +6124,6 @@ // CHECK28-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK28-NEXT: [[CONV:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR__ADDR]] to i8* @@ -6137,6 +6136,7 @@ // CHECK28: omp_if.else: // CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]]) #[[ATTR2:[0-9]+]] // CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: br label [[OMP_IF_END]] @@ -6180,7 +6180,6 @@ // CHECK28-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK28-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK28-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 @@ -6199,6 +6198,7 @@ // CHECK28: omp_if.else: // CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: call void @.omp_outlined..2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]], i32 [[TMP3]]) #[[ATTR2]] // CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: br label [[OMP_IF_END]] @@ -6233,7 +6233,6 @@ // CHECK28-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK28-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 // CHECK28-NEXT: store i32 [[DOTCAPTURE_EXPR_]], i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 @@ -6248,6 +6247,7 @@ // CHECK28: omp_if.else: // CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: call void @.omp_outlined..3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], %struct.S1* [[TMP1]]) #[[ATTR2]] // CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: br label [[OMP_IF_END]] @@ -6277,7 +6277,6 @@ // CHECK28-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK28-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK28-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 // CHECK28-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4 @@ -6285,6 +6284,7 @@ // CHECK28-NEXT: [[TMP2:%.*]] = load i32, i32* [[A_CASTED]], align 4 // CHECK28-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK28-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK28-NEXT: call void @.omp_outlined..4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i32 [[TMP2]]) #[[ATTR2]] // CHECK28-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK28-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -286,7 +286,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -322,6 +321,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -633,7 +633,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -669,6 +668,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -787,7 +787,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -832,6 +831,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1134,7 +1134,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1170,6 +1169,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1288,7 +1288,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1333,6 +1332,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1610,7 +1610,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1646,6 +1645,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1957,7 +1957,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1993,6 +1992,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2111,7 +2111,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2156,6 +2155,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2458,7 +2458,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2494,6 +2493,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2612,7 +2612,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2657,6 +2656,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2934,7 +2934,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2970,6 +2969,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3281,7 +3281,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3317,6 +3316,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3435,7 +3435,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3480,6 +3479,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -3782,7 +3782,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3818,6 +3817,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3936,7 +3936,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3981,6 +3980,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -4258,7 +4258,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4294,6 +4293,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4605,7 +4605,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4641,6 +4640,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4759,7 +4759,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4804,6 +4803,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5106,7 +5106,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5142,6 +5141,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5260,7 +5260,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5305,6 +5304,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5582,7 +5582,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5618,6 +5617,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5929,7 +5929,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5965,6 +5964,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6083,7 +6083,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6128,6 +6127,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6430,7 +6430,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6466,6 +6465,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6584,7 +6584,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6629,6 +6628,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6906,7 +6906,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6942,6 +6941,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7253,7 +7253,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7289,6 +7288,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7407,7 +7407,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -7452,6 +7451,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -7754,7 +7754,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7790,6 +7789,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7908,7 +7908,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -7953,6 +7952,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -8230,7 +8230,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8266,6 +8265,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8577,7 +8577,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8613,6 +8612,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8731,7 +8731,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8776,6 +8775,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9078,7 +9078,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9114,6 +9113,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9232,7 +9232,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9277,6 +9276,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9554,7 +9554,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9590,6 +9589,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9901,7 +9901,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9937,6 +9936,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10055,7 +10055,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10100,6 +10099,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10402,7 +10402,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10438,6 +10437,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10556,7 +10556,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10601,6 +10600,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10696,4 +10696,3 @@ // CHECK14-NEXT: call void @__tgt_register_requires(i64 1) // CHECK14-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -205,26 +205,26 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -289,23 +289,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -341,7 +341,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -366,25 +365,26 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -446,23 +446,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: call void @_Z9gtid_testv() // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -587,22 +587,22 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -664,23 +664,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn4v() // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -716,7 +716,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -741,25 +740,26 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -821,23 +821,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn5v() // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -884,7 +884,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -911,34 +910,35 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1000,23 +1000,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn6v() // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1130,22 +1130,22 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1207,23 +1207,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 -// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn1v() // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1259,7 +1259,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1284,25 +1283,26 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1364,23 +1364,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 -// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn2v() // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1427,7 +1427,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1454,34 +1453,35 @@ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 -// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1543,23 +1543,23 @@ // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 -// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: call void @_Z3fn3v() // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1679,26 +1679,26 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1763,23 +1763,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1815,7 +1815,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1840,25 +1839,26 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -1920,23 +1920,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: call void @_Z9gtid_testv() // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2061,22 +2061,22 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2138,23 +2138,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 -// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: call void @_Z3fn4v() // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2190,7 +2190,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2215,25 +2214,26 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2295,23 +2295,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 -// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: call void @_Z3fn5v() // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2358,7 +2358,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2385,34 +2384,35 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2474,23 +2474,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 -// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: call void @_Z3fn6v() // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2604,22 +2604,22 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2681,23 +2681,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 -// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: call void @_Z3fn1v() // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2733,7 +2733,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2758,25 +2757,26 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2838,23 +2838,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 -// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: call void @_Z3fn2v() // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -2901,7 +2901,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2928,34 +2927,35 @@ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 -// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3017,23 +3017,23 @@ // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 -// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: call void @_Z3fn3v() // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3153,26 +3153,26 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3237,23 +3237,23 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3289,7 +3289,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3314,25 +3313,26 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3394,23 +3394,23 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: call void @_Z9gtid_testv() // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3535,22 +3535,22 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3612,23 +3612,23 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: call void @_Z3fn4v() // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -3664,7 +3664,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3700,6 +3699,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3835,8 +3835,6 @@ // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3867,40 +3865,41 @@ // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] @@ -3931,6 +3930,7 @@ // CHECK3: omp_if.else16: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_IF_END18]] @@ -4011,23 +4011,23 @@ // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: call void @_Z3fn6v() // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4139,23 +4139,23 @@ // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 -// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: call void @_Z3fn6v() // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] @@ -4312,22 +4312,22 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4389,23 +4389,23 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 -// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: call void @_Z3fn1v() // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4441,7 +4441,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4477,6 +4476,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4609,7 +4609,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4636,34 +4635,35 @@ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 -// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4725,23 +4725,23 @@ // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 -// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: call void @_Z3fn3v() // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4861,26 +4861,26 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4945,23 +4945,23 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -4997,7 +4997,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5022,25 +5021,26 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5102,23 +5102,23 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: call void @_Z9gtid_testv() // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5243,22 +5243,22 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5320,23 +5320,23 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: call void @_Z3fn4v() // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -5372,7 +5372,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5408,6 +5407,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5543,8 +5543,6 @@ // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5575,40 +5573,41 @@ // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 -// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END22:%.*]] @@ -5639,6 +5638,7 @@ // CHECK4: omp_if.else16: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_IF_END18]] @@ -5719,23 +5719,23 @@ // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 -// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: call void @_Z3fn6v() // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -5847,23 +5847,23 @@ // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 -// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: call void @_Z3fn6v() // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] @@ -6020,22 +6020,22 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6097,23 +6097,23 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 -// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: call void @_Z3fn1v() // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6149,7 +6149,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6185,6 +6184,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6317,7 +6317,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6344,34 +6343,35 @@ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 -// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6433,23 +6433,23 @@ // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 -// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: call void @_Z3fn3v() // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -6491,23 +6491,23 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 0, i32* @Arg, align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6517,23 +6517,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z9gtid_testv() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6567,23 +6567,23 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: call void @_Z3fn4v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6593,23 +6593,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z3fn5v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6623,23 +6623,23 @@ // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK5-NEXT: call void @_Z3fn6v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6675,23 +6675,23 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: call void @_Z3fn1v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6701,23 +6701,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z3fn2v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6731,23 +6731,23 @@ // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK5-NEXT: call void @_Z3fn3v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6773,23 +6773,23 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 -// CHECK6-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !2 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 0, i32* @Arg, align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6799,23 +6799,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z9gtid_testv() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6849,23 +6849,23 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: call void @_Z3fn4v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6875,23 +6875,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z3fn5v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6905,23 +6905,23 @@ // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK6-NEXT: call void @_Z3fn6v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6957,23 +6957,23 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: call void @_Z3fn1v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6983,23 +6983,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z3fn2v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7013,23 +7013,23 @@ // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK6-NEXT: call void @_Z3fn3v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7055,23 +7055,23 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7081,23 +7081,23 @@ // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !7 -// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !7 +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK7-NEXT: call void @_Z9gtid_testv() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7131,23 +7131,23 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !10 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: call void @_Z3fn4v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7191,23 +7191,23 @@ // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !15 -// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK7-NEXT: call void @_Z3fn6v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7268,23 +7268,23 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !19 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: call void @_Z3fn1v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7324,23 +7324,23 @@ // CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !23 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !23 -// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !23 +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK7-NEXT: call void @_Z3fn3v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7366,23 +7366,23 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 -// CHECK8-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3, !llvm.access.group !2 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !3 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7392,23 +7392,23 @@ // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !7 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !7 -// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !7 +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK8-NEXT: call void @_Z9gtid_testv() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !7 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7442,23 +7442,23 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !10 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: call void @_Z3fn4v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7502,23 +7502,23 @@ // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !15 -// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK8-NEXT: call void @_Z3fn6v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7579,23 +7579,23 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !19 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: call void @_Z3fn1v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7635,23 +7635,23 @@ // CHECK8-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !23 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !23 -// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !23 +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK8-NEXT: call void @_Z3fn3v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !23 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7755,26 +7755,26 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7839,23 +7839,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7891,7 +7891,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7916,25 +7915,26 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -7996,23 +7996,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: call void @_Z9gtid_testv() // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8137,22 +8137,22 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8214,23 +8214,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: call void @_Z3fn4v() // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8266,7 +8266,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8291,25 +8290,26 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8371,23 +8371,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: call void @_Z3fn5v() // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8434,7 +8434,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8461,34 +8460,35 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8550,23 +8550,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 -// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: call void @_Z3fn6v() // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8680,22 +8680,22 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8757,23 +8757,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 -// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: call void @_Z3fn1v() // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8809,7 +8809,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8834,25 +8833,26 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8914,23 +8914,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 -// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: call void @_Z3fn2v() // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -8977,7 +8977,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9004,34 +9003,35 @@ // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 -// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9093,23 +9093,23 @@ // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 -// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK9-NEXT: call void @_Z3fn3v() // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9229,26 +9229,26 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 +// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9313,23 +9313,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 -// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9365,7 +9365,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9390,25 +9389,26 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9470,23 +9470,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 -// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: call void @_Z9gtid_testv() // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9611,22 +9611,22 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9688,23 +9688,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 -// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: call void @_Z3fn4v() // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9740,7 +9740,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9765,25 +9764,26 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9845,23 +9845,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: call void @_Z3fn5v() // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -9908,7 +9908,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9935,34 +9934,35 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10024,23 +10024,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 -// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: call void @_Z3fn6v() // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10154,22 +10154,22 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10231,23 +10231,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 -// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: call void @_Z3fn1v() // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10283,7 +10283,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10308,25 +10307,26 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10388,23 +10388,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 -// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: call void @_Z3fn2v() // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10451,7 +10451,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10478,34 +10477,35 @@ // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 -// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10567,23 +10567,23 @@ // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 -// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK10-NEXT: call void @_Z3fn3v() // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10703,26 +10703,26 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10787,23 +10787,23 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10839,7 +10839,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10864,25 +10863,26 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -10944,23 +10944,23 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 -// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: call void @_Z9gtid_testv() // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11085,22 +11085,22 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11162,23 +11162,23 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: call void @_Z3fn4v() // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11214,7 +11214,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -11250,6 +11249,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11385,8 +11385,6 @@ // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -11417,40 +11415,41 @@ // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] @@ -11481,6 +11480,7 @@ // CHECK11: omp_if.else16: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_IF_END18]] @@ -11561,23 +11561,23 @@ // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 -// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: call void @_Z3fn6v() // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11689,23 +11689,23 @@ // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 -// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: call void @_Z3fn6v() // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] @@ -11862,22 +11862,22 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11939,23 +11939,23 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: call void @_Z3fn1v() // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -11991,7 +11991,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12027,6 +12026,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12159,7 +12159,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -12186,34 +12185,35 @@ // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 -// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12275,23 +12275,23 @@ // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 -// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: call void @_Z3fn3v() // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12411,26 +12411,26 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 -// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 +// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12495,23 +12495,23 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12547,7 +12547,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12572,25 +12571,26 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12652,23 +12652,23 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 -// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: call void @_Z9gtid_testv() // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12793,22 +12793,22 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12870,23 +12870,23 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 -// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: call void @_Z3fn4v() // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -12922,7 +12922,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12958,6 +12957,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13093,8 +13093,6 @@ // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -13125,40 +13123,41 @@ // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 -// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 +// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END22:%.*]] @@ -13189,6 +13188,7 @@ // CHECK12: omp_if.else16: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_IF_END18]] @@ -13269,23 +13269,23 @@ // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 -// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: call void @_Z3fn6v() // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13397,23 +13397,23 @@ // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 -// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: call void @_Z3fn6v() // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] @@ -13570,22 +13570,22 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13647,23 +13647,23 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: call void @_Z3fn1v() // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13699,7 +13699,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -13735,6 +13734,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13867,7 +13867,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -13894,34 +13893,35 @@ // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 -// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 +// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -13983,23 +13983,23 @@ // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 -// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: call void @_Z3fn3v() // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] @@ -14041,23 +14041,23 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: store i32 0, i32* @Arg, align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14067,23 +14067,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z9gtid_testv() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14117,23 +14117,23 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: call void @_Z3fn4v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14143,23 +14143,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z3fn5v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14173,23 +14173,23 @@ // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 -// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK13-NEXT: call void @_Z3fn6v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14225,23 +14225,23 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: call void @_Z3fn1v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14251,23 +14251,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z3fn2v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14281,23 +14281,23 @@ // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK13-NEXT: call void @_Z3fn3v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14323,23 +14323,23 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK14-NEXT: store i32 0, i32* @Arg, align 4, !llvm.access.group !6 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: store i32 0, i32* @Arg, align 4 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14349,23 +14349,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z9gtid_testv() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14399,23 +14399,23 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: call void @_Z3fn4v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14425,23 +14425,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z3fn5v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14455,23 +14455,23 @@ // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 -// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK14-NEXT: call void @_Z3fn6v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14507,23 +14507,23 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: call void @_Z3fn1v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14533,23 +14533,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z3fn2v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14563,23 +14563,23 @@ // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK14-NEXT: call void @_Z3fn3v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14605,23 +14605,23 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK15-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7, !llvm.access.group !6 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14631,23 +14631,23 @@ // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !11 -// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !11 +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK15-NEXT: call void @_Z9gtid_testv() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14681,23 +14681,23 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: call void @_Z3fn4v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14741,23 +14741,23 @@ // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !19 -// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK15-NEXT: call void @_Z3fn6v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14818,23 +14818,23 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: call void @_Z3fn1v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14874,23 +14874,23 @@ // CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !27 -// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !27 +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK15-NEXT: call void @_Z3fn3v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14916,23 +14916,23 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 -// CHECK16-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7, !llvm.access.group !6 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: store i32 0, i32* @Arg, align 4, !nontemporal !7 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14942,23 +14942,23 @@ // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !11 -// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !11 +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK16-NEXT: call void @_Z9gtid_testv() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !11 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14992,23 +14992,23 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !14 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: call void @_Z3fn4v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -15052,23 +15052,23 @@ // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !19 -// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK16-NEXT: call void @_Z3fn6v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -15129,23 +15129,23 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !23 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: call void @_Z3fn1v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -15185,23 +15185,23 @@ // CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !27 -// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !27 +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK16-NEXT: call void @_Z3fn3v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/task_codegen.c b/clang/test/OpenMP/task_codegen.c --- a/clang/test/OpenMP/task_codegen.c +++ b/clang/test/OpenMP/task_codegen.c @@ -28,8 +28,6 @@ // CHECK: [[DEPOBJ_SIZE_ADDR1:%.+]] = alloca i64, // CHECK: = alloca i64, // CHECK: [[DEP_COUNTER_ADDR:%.+]] = alloca i64, - // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR1]], - // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR]], // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num( // CHECK: [[ALLOC:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 [[GTID]], i32 65, i64 48, i64 0, i32 (i32, i8*)* bitcast (i32 (i32, [[PRIVATES_TY:%.+]]*)* [[TASK_ENTRY:@.+]] to i32 (i32, i8*)*)) // CHECK: [[EVT_VAL:%.+]] = call i8* @__kmpc_task_allow_completion_event(%struct.ident_t* @{{.+}}, i32 [[GTID]], i8* [[ALLOC]]) @@ -41,6 +39,7 @@ // CHECK: [[D_DEP_BASE:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[D_DEP]], i{{.+}} -1 // CHECK: [[D_DEP_BASE_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[D_DEP_BASE]], i{{.+}} 0, i{{.+}} 0 // CHECK: [[SIZE1:%.+]] = load i64, i64* [[D_DEP_BASE_SIZE]], + // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR]], // CHECK: [[SZ:%.+]] = load i64, i64* [[DEPOBJ_SIZE_ADDR]], // CHECK: [[SIZE:%.+]] = add nuw i64 [[SZ]], [[SIZE1]] // CHECK: store i64 [[SIZE]], i64* [[DEPOBJ_SIZE_ADDR]], @@ -49,6 +48,7 @@ // CHECK: [[X_DEP_BASE:%.+]] = getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[X_DEP]], i{{.+}} -1 // CHECK: [[X_DEP_BASE_SIZE:%.+]] = getelementptr inbounds %struct.kmp_depend_info, %struct.kmp_depend_info* [[X_DEP_BASE]], i{{.+}} 0, i{{.+}} 0 // CHECK: [[SIZE2:%.+]] = load i64, i64* [[X_DEP_BASE_SIZE]], + // CHECK-DAG: store i64 0, i64* [[DEPOBJ_SIZE_ADDR1]], // CHECK: [[SZ:%.+]] = load i64, i64* [[DEPOBJ_SIZE_ADDR1]], // CHECK: [[SIZE3:%.+]] = add nuw i64 [[SZ]], [[SIZE2]] // CHECK: store i64 [[SIZE3]], i64* [[DEPOBJ_SIZE_ADDR1]], diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -294,7 +294,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -330,6 +329,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -638,7 +638,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -674,6 +673,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -797,7 +797,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -842,6 +841,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1150,7 +1150,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1186,6 +1185,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1309,7 +1309,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1354,6 +1353,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1631,7 +1631,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1667,6 +1666,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1975,7 +1975,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2011,6 +2010,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2134,7 +2134,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2179,6 +2178,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2487,7 +2487,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2523,6 +2522,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2646,7 +2646,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2691,6 +2690,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2968,7 +2968,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3004,6 +3003,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3312,7 +3312,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3348,6 +3347,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3471,7 +3471,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3516,6 +3515,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -3824,7 +3824,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3860,6 +3859,7 @@ // CHECK5-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3983,7 +3983,6 @@ // CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK5-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4028,6 +4027,7 @@ // CHECK5: omp_if.else: // CHECK5-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK5-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK5-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK5-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK5-NEXT: br label [[OMP_IF_END]] @@ -4305,7 +4305,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4341,6 +4340,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4649,7 +4649,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4685,6 +4684,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4808,7 +4808,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4853,6 +4852,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5161,7 +5161,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5197,6 +5196,7 @@ // CHECK6-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5320,7 +5320,6 @@ // CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK6-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK6-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5365,6 +5364,7 @@ // CHECK6: omp_if.else: // CHECK6-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK6-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK6-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK6-NEXT: br label [[OMP_IF_END]] @@ -5642,7 +5642,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5678,6 +5677,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5986,7 +5986,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6022,6 +6021,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6145,7 +6145,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6190,6 +6189,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6498,7 +6498,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6534,6 +6533,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6657,7 +6657,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6702,6 +6701,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -6979,7 +6979,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7015,6 +7014,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7323,7 +7323,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7359,6 +7358,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7482,7 +7482,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -7527,6 +7526,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -7835,7 +7835,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7871,6 +7870,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7994,7 +7994,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8039,6 +8038,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -8316,7 +8316,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8352,6 +8351,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8660,7 +8660,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8696,6 +8695,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8819,7 +8819,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8864,6 +8863,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9172,7 +9172,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9208,6 +9207,7 @@ // CHECK13-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9331,7 +9331,6 @@ // CHECK13-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK13-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9376,6 +9375,7 @@ // CHECK13: omp_if.else: // CHECK13-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK13-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK13-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK13-NEXT: br label [[OMP_IF_END]] @@ -9653,7 +9653,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9689,6 +9688,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9997,7 +9997,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10033,6 +10032,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10156,7 +10156,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10201,6 +10200,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10509,7 +10509,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10545,6 +10544,7 @@ // CHECK14-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10668,7 +10668,6 @@ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10713,6 +10712,7 @@ // CHECK14: omp_if.else: // CHECK14-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK14-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK14-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK14-NEXT: br label [[OMP_IF_END]] @@ -10808,4 +10808,3 @@ // CHECK14-NEXT: call void @__tgt_register_requires(i64 1) // CHECK14-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -193,7 +193,7 @@ // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -270,7 +270,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -305,7 +305,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -341,6 +340,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -349,7 +349,7 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -427,7 +427,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -564,7 +564,7 @@ // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -642,7 +642,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -677,7 +677,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -713,6 +712,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -721,7 +721,7 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -799,7 +799,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -850,7 +850,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -895,6 +894,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -905,7 +905,7 @@ // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -983,7 +983,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1118,7 +1118,7 @@ // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1196,7 +1196,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1231,7 +1231,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1267,6 +1266,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1275,7 +1275,7 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1353,7 +1353,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1404,7 +1404,6 @@ // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -1449,6 +1448,7 @@ // CHECK1: omp_if.else: // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK1-NEXT: br label [[OMP_IF_END]] @@ -1459,7 +1459,7 @@ // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1537,7 +1537,7 @@ // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1642,7 +1642,7 @@ // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1719,7 +1719,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1754,7 +1754,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -1790,6 +1789,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -1798,7 +1798,7 @@ // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1876,7 +1876,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2013,7 +2013,7 @@ // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2091,7 +2091,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2126,7 +2126,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2162,6 +2161,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2170,7 +2170,7 @@ // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2248,7 +2248,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2299,7 +2299,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2344,6 +2343,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2354,7 +2354,7 @@ // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2432,7 +2432,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2567,7 +2567,7 @@ // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2645,7 +2645,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2680,7 +2680,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -2716,6 +2715,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -2724,7 +2724,7 @@ // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2802,7 +2802,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2853,7 +2853,6 @@ // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK2-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -2898,6 +2897,7 @@ // CHECK2: omp_if.else: // CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK2-NEXT: br label [[OMP_IF_END]] @@ -2908,7 +2908,7 @@ // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2986,7 +2986,7 @@ // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3091,7 +3091,7 @@ // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3168,7 +3168,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3203,7 +3203,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3239,6 +3238,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3247,7 +3247,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3325,7 +3325,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3462,7 +3462,7 @@ // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3540,7 +3540,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3575,7 +3575,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -3611,6 +3610,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -3619,7 +3619,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3697,7 +3697,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3751,8 +3751,6 @@ // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -3807,6 +3805,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -3817,7 +3816,7 @@ // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK3: omp_if.else6: @@ -3847,6 +3846,7 @@ // CHECK3: omp_if.else16: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK3-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_IF_END18]] @@ -3857,7 +3857,7 @@ // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK3: omp.inner.for.end21: // CHECK3-NEXT: br label [[OMP_IF_END22]] // CHECK3: omp_if.end22: @@ -3944,7 +3944,7 @@ // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -3983,7 +3983,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -4072,7 +4072,7 @@ // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -4111,7 +4111,7 @@ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -4250,7 +4250,7 @@ // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4328,7 +4328,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4363,7 +4363,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4399,6 +4398,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4407,7 +4407,7 @@ // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4485,7 +4485,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4536,7 +4536,6 @@ // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK3-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -4581,6 +4580,7 @@ // CHECK3: omp_if.else: // CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK3-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK3-NEXT: br label [[OMP_IF_END]] @@ -4591,7 +4591,7 @@ // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4669,7 +4669,7 @@ // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4774,7 +4774,7 @@ // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4851,7 +4851,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4886,7 +4886,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -4922,6 +4921,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4930,7 +4930,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5008,7 +5008,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5145,7 +5145,7 @@ // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5223,7 +5223,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5258,7 +5258,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -5294,6 +5293,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -5302,7 +5302,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5380,7 +5380,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5434,8 +5434,6 @@ // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -5490,6 +5488,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -5500,7 +5499,7 @@ // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK4: omp_if.else6: @@ -5530,6 +5529,7 @@ // CHECK4: omp_if.else16: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK4-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_IF_END18]] @@ -5540,7 +5540,7 @@ // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK4-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK4: omp.inner.for.end21: // CHECK4-NEXT: br label [[OMP_IF_END22]] // CHECK4: omp_if.end22: @@ -5627,7 +5627,7 @@ // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5666,7 +5666,7 @@ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5755,7 +5755,7 @@ // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5794,7 +5794,7 @@ // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5933,7 +5933,7 @@ // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6011,7 +6011,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6046,7 +6046,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -6082,6 +6081,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6090,7 +6090,7 @@ // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6168,7 +6168,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6219,7 +6219,6 @@ // CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK4-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -6264,6 +6263,7 @@ // CHECK4: omp_if.else: // CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK4-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK4-NEXT: br label [[OMP_IF_END]] @@ -6274,7 +6274,7 @@ // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6352,7 +6352,7 @@ // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6393,22 +6393,22 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6418,23 +6418,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK5-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z9gtid_testv() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6468,23 +6468,23 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: call void @_Z3fn4v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6494,23 +6494,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z3fn5v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6524,23 +6524,23 @@ // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK5-NEXT: call void @_Z3fn6v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6576,23 +6576,23 @@ // CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: call void @_Z3fn1v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: store i32 100, i32* [[I]], align 4 @@ -6602,23 +6602,23 @@ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK5: omp.inner.for.cond7: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK5: omp.inner.for.body9: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK5-NEXT: call void @_Z3fn2v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK5: omp.body.continue12: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK5: omp.inner.for.inc13: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end15: // CHECK5-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6632,23 +6632,23 @@ // CHECK5-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK5: omp.inner.for.cond21: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK5-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK5: omp.inner.for.body23: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 -// CHECK5-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 +// CHECK5-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK5-NEXT: call void @_Z3fn3v() // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK5: omp.body.continue26: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK5: omp.inner.for.inc27: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6674,22 +6674,22 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6699,23 +6699,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK6-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z9gtid_testv() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6749,23 +6749,23 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: call void @_Z3fn4v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6775,23 +6775,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: call void @_Z3fn5v(), !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z3fn5v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6805,23 +6805,23 @@ // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: call void @_Z3fn6v(), !llvm.access.group !15 +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK6-NEXT: call void @_Z3fn6v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6857,23 +6857,23 @@ // CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: call void @_Z3fn1v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: store i32 100, i32* [[I]], align 4 @@ -6883,23 +6883,23 @@ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK6: omp.inner.for.cond7: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK6: omp.inner.for.body9: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: call void @_Z3fn2v(), !llvm.access.group !21 +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK6-NEXT: call void @_Z3fn2v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK6: omp.body.continue12: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK6: omp.inner.for.inc13: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end15: // CHECK6-NEXT: store i32 100, i32* [[I6]], align 4 @@ -6913,23 +6913,23 @@ // CHECK6-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK6: omp.inner.for.cond21: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK6-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK6: omp.inner.for.body23: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !24 -// CHECK6-NEXT: call void @_Z3fn3v(), !llvm.access.group !24 +// CHECK6-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK6-NEXT: call void @_Z3fn3v() // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK6: omp.body.continue26: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK6: omp.inner.for.inc27: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end29: // CHECK6-NEXT: store i32 100, i32* [[I20]], align 4 @@ -6955,22 +6955,22 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -6980,23 +6980,23 @@ // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK7: omp.inner.for.cond7: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK7: omp.inner.for.body9: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK7-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK7-NEXT: call void @_Z9gtid_testv() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK7: omp.body.continue12: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK7: omp.inner.for.inc13: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK7-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK7: omp.inner.for.end15: // CHECK7-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7030,23 +7030,23 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK7-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: call void @_Z3fn4v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7090,23 +7090,23 @@ // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK7: omp.inner.for.cond22: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK7-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK7: omp.inner.for.body24: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 -// CHECK7-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 +// CHECK7-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK7-NEXT: call void @_Z3fn6v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK7: omp.body.continue27: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK7: omp.inner.for.inc28: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] @@ -7167,23 +7167,23 @@ // CHECK7-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: call void @_Z3fn1v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: store i32 100, i32* [[I]], align 4 @@ -7223,23 +7223,23 @@ // CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK7: omp.inner.for.cond21: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK7-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK7: omp.inner.for.body23: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 +// CHECK7-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK7-NEXT: call void @_Z3fn3v() // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK7: omp.body.continue26: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK7: omp.inner.for.inc27: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7265,22 +7265,22 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7290,23 +7290,23 @@ // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK8: omp.inner.for.cond7: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK8: omp.inner.for.body9: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !6 -// CHECK8-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !6 +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK8-NEXT: call void @_Z9gtid_testv() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK8: omp.body.continue12: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK8: omp.inner.for.inc13: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !6 +// CHECK8-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK8: omp.inner.for.end15: // CHECK8-NEXT: store i32 100, i32* [[I6]], align 4 @@ -7340,23 +7340,23 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK8-NEXT: call void @_Z3fn4v(), !llvm.access.group !9 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: call void @_Z3fn4v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7400,23 +7400,23 @@ // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK8: omp.inner.for.cond22: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK8-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK8: omp.inner.for.body24: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !14 -// CHECK8-NEXT: call void @_Z3fn6v(), !llvm.access.group !14 +// CHECK8-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK8-NEXT: call void @_Z3fn6v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK8: omp.body.continue27: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK8: omp.inner.for.inc28: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] @@ -7477,23 +7477,23 @@ // CHECK8-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK8-NEXT: call void @_Z3fn1v(), !llvm.access.group !18 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: call void @_Z3fn1v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: store i32 100, i32* [[I]], align 4 @@ -7533,23 +7533,23 @@ // CHECK8-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK8: omp.inner.for.cond21: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK8-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK8-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK8: omp.inner.for.body23: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: call void @_Z3fn3v(), !llvm.access.group !22 +// CHECK8-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK8-NEXT: call void @_Z3fn3v() // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK8: omp.body.continue26: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK8: omp.inner.for.inc27: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK8: omp.inner.for.end29: // CHECK8-NEXT: store i32 100, i32* [[I20]], align 4 @@ -7639,7 +7639,7 @@ // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7716,7 +7716,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7751,7 +7751,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -7787,6 +7786,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7795,7 +7795,7 @@ // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7873,7 +7873,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8010,7 +8010,7 @@ // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8088,7 +8088,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8123,7 +8123,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8159,6 +8158,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8167,7 +8167,7 @@ // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8245,7 +8245,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8296,7 +8296,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8341,6 +8340,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -8351,7 +8351,7 @@ // CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8429,7 +8429,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8564,7 +8564,7 @@ // CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8642,7 +8642,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8677,7 +8677,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -8713,6 +8712,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -8721,7 +8721,7 @@ // CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8799,7 +8799,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8850,7 +8850,6 @@ // CHECK9-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -8895,6 +8894,7 @@ // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK9-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK9-NEXT: br label [[OMP_IF_END]] @@ -8905,7 +8905,7 @@ // CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8983,7 +8983,7 @@ // CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9088,7 +9088,7 @@ // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9165,7 +9165,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9200,7 +9200,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9236,6 +9235,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9244,7 +9244,7 @@ // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9322,7 +9322,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9459,7 +9459,7 @@ // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9537,7 +9537,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9572,7 +9572,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -9608,6 +9607,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9616,7 +9616,7 @@ // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9694,7 +9694,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9745,7 +9745,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -9790,6 +9789,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -9800,7 +9800,7 @@ // CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9878,7 +9878,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10013,7 +10013,7 @@ // CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10091,7 +10091,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10126,7 +10126,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10162,6 +10161,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..13(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10170,7 +10170,7 @@ // CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10248,7 +10248,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10299,7 +10299,6 @@ // CHECK10-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK10-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK10-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -10344,6 +10343,7 @@ // CHECK10: omp_if.else: // CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK10-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK10-NEXT: br label [[OMP_IF_END]] @@ -10354,7 +10354,7 @@ // CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10432,7 +10432,7 @@ // CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10537,7 +10537,7 @@ // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10614,7 +10614,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10649,7 +10649,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -10685,6 +10684,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -10693,7 +10693,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10771,7 +10771,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10908,7 +10908,7 @@ // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10986,7 +10986,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11021,7 +11021,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -11057,6 +11056,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11065,7 +11065,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11143,7 +11143,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11197,8 +11197,6 @@ // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -11253,6 +11251,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -11263,7 +11262,7 @@ // CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK11: omp_if.else6: @@ -11293,6 +11292,7 @@ // CHECK11: omp_if.else16: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK11-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_IF_END18]] @@ -11303,7 +11303,7 @@ // CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK11: omp.inner.for.end21: // CHECK11-NEXT: br label [[OMP_IF_END22]] // CHECK11: omp_if.end22: @@ -11390,7 +11390,7 @@ // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11429,7 +11429,7 @@ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11518,7 +11518,7 @@ // CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11557,7 +11557,7 @@ // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11696,7 +11696,7 @@ // CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11774,7 +11774,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11809,7 +11809,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -11845,6 +11844,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11853,7 +11853,7 @@ // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11931,7 +11931,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11982,7 +11982,6 @@ // CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -12027,6 +12026,7 @@ // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK11-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK11-NEXT: br label [[OMP_IF_END]] @@ -12037,7 +12037,7 @@ // CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12115,7 +12115,7 @@ // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12220,7 +12220,7 @@ // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12297,7 +12297,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12332,7 +12332,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12368,6 +12367,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12376,7 +12376,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12454,7 +12454,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12591,7 +12591,7 @@ // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12669,7 +12669,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12704,7 +12704,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -12740,6 +12739,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -12748,7 +12748,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12826,7 +12826,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12880,8 +12880,6 @@ // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTCAPTURE_EXPR__CASTED11:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR17:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -12936,6 +12934,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -12946,7 +12945,7 @@ // CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK12: omp_if.else6: @@ -12976,6 +12975,7 @@ // CHECK12: omp_if.else16: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR17]], align 4 // CHECK12-NEXT: call void @.omp_outlined..10(i32* [[TMP27]], i32* [[DOTBOUND_ZERO_ADDR17]], i64 [[TMP21]], i64 [[TMP23]], i64 [[TMP25]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_IF_END18]] @@ -12986,7 +12986,7 @@ // CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK12-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK12: omp.inner.for.end21: // CHECK12-NEXT: br label [[OMP_IF_END22]] // CHECK12: omp_if.end22: @@ -13073,7 +13073,7 @@ // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -13112,7 +13112,7 @@ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -13201,7 +13201,7 @@ // CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 // CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -13240,7 +13240,7 @@ // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -13379,7 +13379,7 @@ // CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13457,7 +13457,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13492,7 +13492,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 @@ -13528,6 +13527,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..14(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13536,7 +13536,7 @@ // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13614,7 +13614,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13665,7 +13665,6 @@ // CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK12-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK12-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 @@ -13710,6 +13709,7 @@ // CHECK12: omp_if.else: // CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK12-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK12-NEXT: call void @.omp_outlined..16(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] // CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) // CHECK12-NEXT: br label [[OMP_IF_END]] @@ -13720,7 +13720,7 @@ // CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13798,7 +13798,7 @@ // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13839,22 +13839,22 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13864,23 +13864,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK13-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z9gtid_testv() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13914,23 +13914,23 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK13-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: call void @_Z3fn4v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -13940,23 +13940,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK13-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z3fn5v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -13970,23 +13970,23 @@ // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 -// CHECK13-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK13-NEXT: call void @_Z3fn6v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14022,23 +14022,23 @@ // CHECK13-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK13-NEXT: call void @_Z3fn1v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: store i32 100, i32* [[I]], align 4 @@ -14048,23 +14048,23 @@ // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK13: omp.inner.for.cond7: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK13: omp.inner.for.body9: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 -// CHECK13-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK13-NEXT: call void @_Z3fn2v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK13: omp.body.continue12: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK13: omp.inner.for.inc13: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end15: // CHECK13-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14078,23 +14078,23 @@ // CHECK13-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK13: omp.inner.for.cond21: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK13-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK13-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK13: omp.inner.for.body23: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK13-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 -// CHECK13-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK13-NEXT: call void @_Z3fn3v() // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK13: omp.body.continue26: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK13: omp.inner.for.inc27: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end29: // CHECK13-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14120,22 +14120,22 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14145,23 +14145,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK14-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z9gtid_testv() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14195,23 +14195,23 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK14-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: call void @_Z3fn4v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14221,23 +14221,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !16 -// CHECK14-NEXT: call void @_Z3fn5v(), !llvm.access.group !16 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z3fn5v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !16 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14251,23 +14251,23 @@ // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !19 -// CHECK14-NEXT: call void @_Z3fn6v(), !llvm.access.group !19 +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK14-NEXT: call void @_Z3fn6v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !19 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14303,23 +14303,23 @@ // CHECK14-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: call void @_Z3fn1v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: store i32 100, i32* [[I]], align 4 @@ -14329,23 +14329,23 @@ // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK14: omp.inner.for.cond7: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK14: omp.inner.for.body9: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !25 -// CHECK14-NEXT: call void @_Z3fn2v(), !llvm.access.group !25 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK14-NEXT: call void @_Z3fn2v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK14: omp.body.continue12: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK14: omp.inner.for.inc13: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end15: // CHECK14-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14359,23 +14359,23 @@ // CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK14: omp.inner.for.cond21: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK14-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK14-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK14: omp.inner.for.body23: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK14-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !28 -// CHECK14-NEXT: call void @_Z3fn3v(), !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK14-NEXT: call void @_Z3fn3v() // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK14: omp.body.continue26: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK14: omp.inner.for.inc27: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end29: // CHECK14-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14401,22 +14401,22 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14426,23 +14426,23 @@ // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK15: omp.inner.for.cond7: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK15: omp.inner.for.body9: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK15-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK15-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK15-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK15-NEXT: call void @_Z9gtid_testv() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK15: omp.body.continue12: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK15: omp.inner.for.inc13: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK15-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK15: omp.inner.for.end15: // CHECK15-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14476,23 +14476,23 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK15-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: call void @_Z3fn4v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14536,23 +14536,23 @@ // CHECK15: omp_if.then: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK15: omp.inner.for.cond22: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK15-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK15: omp.inner.for.body24: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 +// CHECK15-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK15-NEXT: call void @_Z3fn6v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK15: omp.body.continue27: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK15: omp.inner.for.inc28: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end30: // CHECK15-NEXT: br label [[OMP_IF_END:%.*]] @@ -14613,23 +14613,23 @@ // CHECK15-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: call void @_Z3fn1v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: store i32 100, i32* [[I]], align 4 @@ -14669,23 +14669,23 @@ // CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK15: omp.inner.for.cond21: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK15-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK15-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK15: omp.inner.for.body23: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK15-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 +// CHECK15-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK15-NEXT: call void @_Z3fn3v() // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK15: omp.body.continue26: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK15: omp.inner.for.inc27: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end29: // CHECK15-NEXT: store i32 100, i32* [[I20]], align 4 @@ -14711,22 +14711,22 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14736,23 +14736,23 @@ // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7:%.*]] // CHECK16: omp.inner.for.cond7: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB4]], align 4 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY9:%.*]], label [[OMP_INNER_FOR_END15:%.*]] // CHECK16: omp.inner.for.body9: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: [[MUL10:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK16-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4, !llvm.access.group !10 -// CHECK16-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !10 +// CHECK16-NEXT: store i32 [[ADD11]], i32* [[I6]], align 4 +// CHECK16-NEXT: call void @_Z9gtid_testv() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE12:%.*]] // CHECK16: omp.body.continue12: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC13:%.*]] // CHECK16: omp.inner.for.inc13: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4, !llvm.access.group !10 +// CHECK16-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV5]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK16: omp.inner.for.end15: // CHECK16-NEXT: store i32 100, i32* [[I6]], align 4 @@ -14786,23 +14786,23 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK16-NEXT: call void @_Z3fn4v(), !llvm.access.group !13 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: call void @_Z3fn4v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14846,23 +14846,23 @@ // CHECK16: omp_if.then: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22:%.*]] // CHECK16: omp.inner.for.cond22: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK16-NEXT: [[CMP23:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP23]], label [[OMP_INNER_FOR_BODY24:%.*]], label [[OMP_INNER_FOR_END30:%.*]] // CHECK16: omp.inner.for.body24: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[MUL25:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD26:%.*]] = add nsw i32 0, [[MUL25]] -// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: call void @_Z3fn6v(), !llvm.access.group !18 +// CHECK16-NEXT: store i32 [[ADD26]], i32* [[I20]], align 4 +// CHECK16-NEXT: call void @_Z3fn6v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE27:%.*]] // CHECK16: omp.body.continue27: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC28:%.*]] // CHECK16: omp.inner.for.inc28: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: store i32 [[ADD29]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND22]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK16: omp.inner.for.end30: // CHECK16-NEXT: br label [[OMP_IF_END:%.*]] @@ -14923,23 +14923,23 @@ // CHECK16-NEXT: store i32 [[TMP0]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], [[TMP2]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK16-NEXT: call void @_Z3fn1v(), !llvm.access.group !22 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: call void @_Z3fn1v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: store i32 100, i32* [[I]], align 4 @@ -14979,23 +14979,23 @@ // CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21:%.*]] // CHECK16: omp.inner.for.cond21: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB18]], align 4 // CHECK16-NEXT: [[CMP22:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK16-NEXT: br i1 [[CMP22]], label [[OMP_INNER_FOR_BODY23:%.*]], label [[OMP_INNER_FOR_END29:%.*]] // CHECK16: omp.inner.for.body23: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK16-NEXT: [[ADD25:%.*]] = add nsw i32 0, [[MUL24]] -// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4, !llvm.access.group !26 -// CHECK16-NEXT: call void @_Z3fn3v(), !llvm.access.group !26 +// CHECK16-NEXT: store i32 [[ADD25]], i32* [[I20]], align 4 +// CHECK16-NEXT: call void @_Z3fn3v() // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE26:%.*]] // CHECK16: omp.body.continue26: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC27:%.*]] // CHECK16: omp.inner.for.inc27: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: store i32 [[ADD28]], i32* [[DOTOMP_IV19]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND21]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK16: omp.inner.for.end29: // CHECK16-NEXT: store i32 100, i32* [[I20]], align 4 diff --git a/clang/test/OpenMP/vla_crash.c b/clang/test/OpenMP/vla_crash.c --- a/clang/test/OpenMP/vla_crash.c +++ b/clang/test/OpenMP/vla_crash.c @@ -29,7 +29,6 @@ // CHECK1-NEXT: [[C:%.*]] = alloca i32***, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* @a, align 4 // CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 @@ -37,6 +36,7 @@ // CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined.(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], i32** [[B]], i64 [[TMP4]], i32**** [[C]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void @@ -90,7 +90,6 @@ // CHECK1-NEXT: [[P:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTBOUND_ZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 // CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 @@ -100,6 +99,7 @@ // CHECK1-NEXT: store i32* [[TMP3]], i32** [[P]], align 8 // CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTBOUND_ZERO_ADDR]], align 4 // CHECK1-NEXT: call void @.omp_outlined..1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP2]], i32** [[P]], i32** [[A_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) // CHECK1-NEXT: ret void @@ -130,4 +130,3 @@ // CHECK1: if.end: // CHECK1-NEXT: ret void // -// \ No newline at end of file