diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -1321,6 +1321,7 @@ // CHECK3-LABEL: define {{[^@]+}}@main // CHECK3-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i8*** }, align 8 // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 @@ -1350,7 +1351,11 @@ // CHECK3-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) +// CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[ARGC_ADDR]], i32** [[GEP_ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 1 +// CHECK3-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[GEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i8*** }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32*, i8*** }* [[STRUCTARG]]) // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK3: omp.par.outlined.exit: // CHECK3-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -1553,58 +1558,62 @@ // // // CHECK3-LABEL: define {{[^@]+}}@main..omp_par -// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i8*** }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: omp.par.entry: +// CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load i32*, i32** [[GEP_ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[LOADGEP_ARGV_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGV_ADDR]], align 8 // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 // CHECK3-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: -// CHECK3-NEXT: [[TMP1:%.*]] = load float, float* @flag, align 4 -// CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 -// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]] -// CHECK3: 2: -// CHECK3-NEXT: br label [[TMP3:%.*]] +// CHECK3-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 +// CHECK3-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 +// CHECK3-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] // CHECK3: 3: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP4]] to i8 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0 -// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0 +// CHECK3-NEXT: br label [[TMP4:%.*]] +// CHECK3: 4: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 // CHECK3-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] // CHECK3: .cncl5: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK3: .cont: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK3-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK3-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK3-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK3: 13: +// CHECK3: 14: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK3-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] // CHECK3: .cncl: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] // CHECK3: .split: -// CHECK3-NEXT: br label [[TMP3]] +// CHECK3-NEXT: br label [[TMP4]] // CHECK3: omp.par.outlined.exit.exitStub: // CHECK3-NEXT: ret void // @@ -1948,6 +1957,7 @@ // CHECK4-LABEL: define {{[^@]+}}@main // CHECK4-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK4-NEXT: entry: +// CHECK4-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i8*** }, align 8 // CHECK4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 @@ -1977,7 +1987,11 @@ // CHECK4-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) +// CHECK4-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0 +// CHECK4-NEXT: store i32* [[ARGC_ADDR]], i32** [[GEP_ARGC_ADDR]], align 8 +// CHECK4-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 1 +// CHECK4-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[GEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i8*** }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32*, i8*** }* [[STRUCTARG]]) // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK4: omp.par.outlined.exit: // CHECK4-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -2180,58 +2194,62 @@ // // // CHECK4-LABEL: define {{[^@]+}}@main..omp_par -// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i8*** }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: omp.par.entry: +// CHECK4-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load i32*, i32** [[GEP_ARGC_ADDR]], align 8 +// CHECK4-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[LOADGEP_ARGV_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGV_ADDR]], align 8 // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: [[TMP1:%.*]] = load float, float* @flag, align 4 -// CHECK4-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 -// CHECK4-NEXT: br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]] -// CHECK4: 2: -// CHECK4-NEXT: br label [[TMP3:%.*]] +// CHECK4-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 +// CHECK4-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 +// CHECK4-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] // CHECK4: 3: -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP4]] to i8 -// CHECK4-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0 -// CHECK4-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0 +// CHECK4-NEXT: br label [[TMP4:%.*]] +// CHECK4: 4: +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 // CHECK4-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK4-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK4-NEXT: br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK4-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK4-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] // CHECK4: .cncl5: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK4: .cont: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK4-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK4-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK4-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK4: 13: +// CHECK4: 14: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK4-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 -// CHECK4-NEXT: br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK4-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK4-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] // CHECK4: .cncl: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK4-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] // CHECK4: .split: -// CHECK4-NEXT: br label [[TMP3]] +// CHECK4-NEXT: br label [[TMP4]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // @@ -3815,6 +3833,7 @@ // CHECK9-LABEL: define {{[^@]+}}@main // CHECK9-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: +// CHECK9-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i8*** }, align 8 // CHECK9-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK9-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 @@ -3844,7 +3863,11 @@ // CHECK9-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK9-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK9: omp_parallel: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) +// CHECK9-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0 +// CHECK9-NEXT: store i32* [[ARGC_ADDR]], i32** [[GEP_ARGC_ADDR]], align 8 +// CHECK9-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 1 +// CHECK9-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[GEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i8*** }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32*, i8*** }* [[STRUCTARG]]) // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK9: omp.par.outlined.exit: // CHECK9-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4047,58 +4070,62 @@ // // // CHECK9-LABEL: define {{[^@]+}}@main..omp_par -// CHECK9-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK9-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i8*** }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK9-NEXT: omp.par.entry: +// CHECK9-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 0 +// CHECK9-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load i32*, i32** [[GEP_ARGC_ADDR]], align 8 +// CHECK9-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 1 +// CHECK9-NEXT: [[LOADGEP_ARGV_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGV_ADDR]], align 8 // CHECK9-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK9-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -// CHECK9-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 // CHECK9-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 // CHECK9-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK9: omp.par.region: -// CHECK9-NEXT: [[TMP1:%.*]] = load float, float* @flag, align 4 -// CHECK9-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 -// CHECK9-NEXT: br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]] -// CHECK9: 2: -// CHECK9-NEXT: br label [[TMP3:%.*]] +// CHECK9-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 +// CHECK9-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 +// CHECK9-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] // CHECK9: 3: -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP4]] to i8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0 +// CHECK9-NEXT: br label [[TMP4:%.*]] +// CHECK9: 4: +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK9-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 // CHECK9-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK9-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK9-NEXT: br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK9-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK9-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK9-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] // CHECK9: .cncl5: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK9: .cont: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0 -// CHECK9-NEXT: [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK9-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK9-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK9-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK9-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK9-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK9-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK9: omp.par.pre_finalize: // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK9: 13: +// CHECK9: 14: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK9-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 -// CHECK9-NEXT: br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK9-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK9-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] // CHECK9: .cncl: // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK9-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK9-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK9-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] // CHECK9: .split: -// CHECK9-NEXT: br label [[TMP3]] +// CHECK9-NEXT: br label [[TMP4]] // CHECK9: omp.par.outlined.exit.exitStub: // CHECK9-NEXT: ret void // @@ -4442,6 +4469,7 @@ // CHECK10-LABEL: define {{[^@]+}}@main // CHECK10-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK10-NEXT: entry: +// CHECK10-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i8*** }, align 8 // CHECK10-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK10-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 @@ -4471,7 +4499,11 @@ // CHECK10-NEXT: [[P_STRIDE30:%.*]] = alloca i32, align 4 // CHECK10-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK10: omp_parallel: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]]) +// CHECK10-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0 +// CHECK10-NEXT: store i32* [[ARGC_ADDR]], i32** [[GEP_ARGC_ADDR]], align 8 +// CHECK10-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 1 +// CHECK10-NEXT: store i8*** [[ARGV_ADDR]], i8**** [[GEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i8*** }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32*, i8*** }* [[STRUCTARG]]) // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK10: omp.par.outlined.exit: // CHECK10-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4674,58 +4706,62 @@ // // // CHECK10-LABEL: define {{[^@]+}}@main..omp_par -// CHECK10-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK10-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i8*** }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK10-NEXT: omp.par.entry: +// CHECK10-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 0 +// CHECK10-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load i32*, i32** [[GEP_ARGC_ADDR]], align 8 +// CHECK10-NEXT: [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 1 +// CHECK10-NEXT: [[LOADGEP_ARGV_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGV_ADDR]], align 8 // CHECK10-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK10-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -// CHECK10-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +// CHECK10-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +// CHECK10-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 // CHECK10-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 // CHECK10-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK10: omp.par.region: -// CHECK10-NEXT: [[TMP1:%.*]] = load float, float* @flag, align 4 -// CHECK10-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00 -// CHECK10-NEXT: br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]] -// CHECK10: 2: -// CHECK10-NEXT: br label [[TMP3:%.*]] +// CHECK10-NEXT: [[TMP2:%.*]] = load float, float* @flag, align 4 +// CHECK10-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00 +// CHECK10-NEXT: br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]] // CHECK10: 3: -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP4]] to i8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0 -// CHECK10-NEXT: [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0 +// CHECK10-NEXT: br label [[TMP4:%.*]] +// CHECK10: 4: +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0 +// CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8 +// CHECK10-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 // CHECK10-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) -// CHECK10-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -// CHECK10-NEXT: br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] +// CHECK10-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) +// CHECK10-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0 +// CHECK10-NEXT: br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]] // CHECK10: .cncl5: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] // CHECK10: .cont: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 -// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0 -// CHECK10-NEXT: [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 -// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8 +// CHECK10-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0 +// CHECK10-NEXT: [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8 +// CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0 +// CHECK10-NEXT: [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1 +// CHECK10-NEXT: [[CONV8:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] // CHECK10-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD]] to i8 // CHECK10-NEXT: store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1 // CHECK10-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK10: omp.par.pre_finalize: // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] -// CHECK10: 13: +// CHECK10: 14: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) -// CHECK10-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0 -// CHECK10-NEXT: br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] +// CHECK10-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1) +// CHECK10-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0 +// CHECK10-NEXT: br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]] // CHECK10: .cncl: // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK10-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) +// CHECK10-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) // CHECK10-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]] // CHECK10: .split: -// CHECK10-NEXT: br label [[TMP3]] +// CHECK10-NEXT: br label [[TMP4]] // CHECK10: omp.par.outlined.exit.exitStub: // CHECK10-NEXT: ret void // diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c --- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c +++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c @@ -33,6 +33,8 @@ // ALL-LABEL: @_Z17nested_parallel_1Pfid( // ALL-NEXT: entry: +// ALL-NEXT: [[STRUCTARG14:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8 +// ALL-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -42,7 +44,15 @@ // ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: -// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// ALL-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 0 +// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8 +// ALL-NEXT: [[GEP_A_ADDR15:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 1 +// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR15]], align 8 +// ALL-NEXT: [[GEP_B_ADDR16:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 2 +// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR16]], align 8 +// ALL-NEXT: [[GEP_R_ADDR17:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 3 +// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR17]], align 8 +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]]) // ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]] // ALL: omp.par.outlined.exit13: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -61,6 +71,10 @@ // ALL-LABEL: @_Z17nested_parallel_2Pfid( // ALL-NEXT: entry: +// ALL-NEXT: [[STRUCTARG68:%.*]] = alloca { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8 +// ALL-NEXT: [[STRUCTARG64:%.*]] = alloca { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8 +// ALL-NEXT: [[STRUCTARG59:%.*]] = alloca { i32*, double*, float** }, align 8 +// ALL-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -70,7 +84,19 @@ // ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: -// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// ALL-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 0 +// ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +// ALL-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 1 +// ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8 +// ALL-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 2 +// ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 +// ALL-NEXT: [[GEP_STRUCTARG64:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 3 +// ALL-NEXT: store { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG64]], { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }** [[GEP_STRUCTARG64]], align 8 +// ALL-NEXT: [[GEP_STRUCTARG69:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 4 +// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG69]], align 8 +// ALL-NEXT: [[GEP_STRUCTARG5970:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 5 +// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG59]], { i32*, double*, float** }** [[GEP_STRUCTARG5970]], align 8 +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]]) // ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]] // ALL: omp.par.outlined.exit55: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -23,15 +23,15 @@ // // CHECK-DEBUG-LABEL: @_Z14parallel_for_0v( // CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG12:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG13:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg [[DBG13:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg [[DBG14:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK-DEBUG: omp.par.outlined.exit: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG17:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG18:![0-9]+]] // void parallel_for_0(void) { #pragma omp parallel @@ -44,6 +44,8 @@ // CHECK-LABEL: @_Z14parallel_for_1Pfid( // CHECK-NEXT: entry: +// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8 +// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -53,7 +55,15 @@ // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// CHECK-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 +// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8 +// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 +// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8 +// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 +// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 +// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3 +// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]]) // CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] // CHECK: omp.par.outlined.exit16: // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -62,24 +72,34 @@ // // CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid( // CHECK-DEBUG-NEXT: entry: +// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG77:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG78:![0-9]+]] +// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3 +// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] // CHECK-DEBUG: omp.par.outlined.exit16: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG80:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG81:![0-9]+]] // void parallel_for_1(float *r, int a, double b) { #pragma omp parallel @@ -96,6 +116,10 @@ // CHECK-LABEL: @_Z14parallel_for_2Pfid( // CHECK-NEXT: entry: +// CHECK-NEXT: [[STRUCTARG218:%.*]] = alloca { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8 +// CHECK-NEXT: [[STRUCTARG214:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8 +// CHECK-NEXT: [[STRUCTARG209:%.*]] = alloca { i32*, double*, float** }, align 8 +// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -113,7 +137,19 @@ // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]) +// CHECK-NEXT: [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0 +// CHECK-NEXT: store { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG214]], { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }** [[GEP_STRUCTARG214]], align 8 +// CHECK-NEXT: [[GEP_STRUCTARG219:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 1 +// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG219]], align 8 +// CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 2 +// CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +// CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 3 +// CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8 +// CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 4 +// CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5 +// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]]) // CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] // CHECK: omp.par.outlined.exit184: // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -169,6 +205,10 @@ // // CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid( // CHECK-DEBUG-NEXT: entry: +// CHECK-DEBUG-NEXT: [[STRUCTARG218:%.*]] = alloca { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG214:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG209:%.*]] = alloca { i32*, double*, float** }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -181,68 +221,80 @@ // CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG138:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG139:![0-9]+]] +// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0 +// CHECK-DEBUG-NEXT: store { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG214]], { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }** [[GEP_STRUCTARG214]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG219:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG219]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 3 +// CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 4 +// CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5 +// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8 +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]]), !dbg [[DBG140:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] // CHECK-DEBUG: omp.par.outlined.exit184: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG146]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG147:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG150:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151:![0-9]+]] +// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG147]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // void parallel_for_2(float *r, int a, double b) { #pragma omp parallel diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -311,70 +311,70 @@ // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] // CHECK2-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGV_ADDR]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4, !dbg [[DBG21:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG22:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG22]] -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG22]] -// CHECK2-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG22]] -// CHECK2-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG22]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30:![0-9]+]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG31:![0-9]+]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG32:![0-9]+]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG33:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] -// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG35:![0-9]+]] -// CHECK2-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG36:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG37:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG37]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG37]] -// CHECK2-NEXT: ret i32 [[TMP5]], !dbg [[DBG37]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGV_ADDR]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4, !dbg [[DBG22:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG23:![0-9]+]] +// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG23]] +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG23]] +// CHECK2-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG23]] +// CHECK2-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG23]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG32:![0-9]+]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG33:![0-9]+]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG34:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG35:![0-9]+]] +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG36:![0-9]+]] +// CHECK2-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG37:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG38:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG38]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG38]] +// CHECK2-NEXT: ret i32 [[TMP5]], !dbg [[DBG38]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__ -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG38:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG39:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG52:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG52]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG53:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG53]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG53]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG54:![0-9]+]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG54]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG52]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG53]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG54:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG55:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG56:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG54]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG55:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG56:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG57:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG55]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } -// CHECK2-NEXT: catch i8* null, !dbg [[DBG52]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG52]] -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7:[0-9]+]], !dbg [[DBG52]] -// CHECK2-NEXT: unreachable, !dbg [[DBG52]] +// CHECK2-NEXT: catch i8* null, !dbg [[DBG53]] +// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG53]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7:[0-9]+]], !dbg [[DBG53]] +// CHECK2-NEXT: unreachable, !dbg [[DBG53]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG57:![0-9]+]] { +// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG58:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG63:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG64:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG65:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -385,31 +385,31 @@ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG65:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG66:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG71:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG71]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG71]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG71]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG71]] -// CHECK2-NEXT: call void @.omp_outlined._debug__(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG71]] -// CHECK2-NEXT: ret void, !dbg [[DBG71]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG72:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG72]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG72]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG72]] +// CHECK2-NEXT: call void @.omp_outlined._debug__(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG72]] +// CHECK2-NEXT: ret void, !dbg [[DBG72]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.1 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG74:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG75:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -418,27 +418,27 @@ // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG81:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[GLOBAL]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] -// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG81]] -// CHECK2-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG81]] -// CHECK2-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG81]] -// CHECK2-NEXT: store i64 [[TMP0]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG81]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA1]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[VLA1]], i32* [[GLOBAL]]), !dbg [[DBG81]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG85:![0-9]+]] -// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP2]]), !dbg [[DBG85]] -// CHECK2-NEXT: ret void, !dbg [[DBG87:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG82:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[GLOBAL]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG82]] +// CHECK2-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG82]] +// CHECK2-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG82]] +// CHECK2-NEXT: store i64 [[TMP0]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG82]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA1]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[VLA1]], i32* [[GLOBAL]]), !dbg [[DBG82]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP2]]), !dbg [[DBG86]] +// CHECK2-NEXT: ret void, !dbg [[DBG88:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.2 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG88:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG89:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -446,37 +446,37 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG96:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] // CHECK2-NEXT: store i32* [[GLOBAL]], i32** [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META97:![0-9]+]], metadata !DIExpression()), !dbg [[DBG98:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG99:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG99]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG99]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG100:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG100]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG99:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG100]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG101:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG101]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG99]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG100]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG101:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG102:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG103:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG101]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG102:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG103:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG104:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG102]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } -// CHECK2-NEXT: catch i8* null, !dbg [[DBG99]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG99]] -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG99]] -// CHECK2-NEXT: unreachable, !dbg [[DBG99]] +// CHECK2-NEXT: catch i8* null, !dbg [[DBG100]] +// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG100]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG100]] +// CHECK2-NEXT: unreachable, !dbg [[DBG100]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] !dbg [[DBG104:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] !dbg [[DBG105:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -484,166 +484,166 @@ // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META105:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] // CHECK2-NEXT: store i32* [[GLOBAL]], i32** [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG111:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG111]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG111]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG111]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG111]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG111]] -// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG111]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.2(i32* [[TMP3]], i32* [[TMP4]], i64 [[TMP0]], i32* [[TMP5]], i32* [[TMP6]]) #[[ATTR6]], !dbg [[DBG111]] -// CHECK2-NEXT: ret void, !dbg [[DBG111]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG112]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG112]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG112]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG112]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] +// CHECK2-NEXT: call void @.omp_outlined._debug__.2(i32* [[TMP3]], i32* [[TMP4]], i64 [[TMP0]], i32* [[TMP5]], i32* [[TMP6]]) #[[ATTR6]], !dbg [[DBG112]] +// CHECK2-NEXT: ret void, !dbg [[DBG112]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG112:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG113:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG117:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG117]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG117]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.1(i32* [[TMP1]], i32* [[TMP2]], i64 [[TMP0]]) #[[ATTR6]], !dbg [[DBG117]] -// CHECK2-NEXT: ret void, !dbg [[DBG117]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG118:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG118]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG118]] +// CHECK2-NEXT: call void @.omp_outlined._debug__.1(i32* [[TMP1]], i32* [[TMP2]], i64 [[TMP0]]) #[[ATTR6]], !dbg [[DBG118]] +// CHECK2-NEXT: ret void, !dbg [[DBG118]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.5 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG118:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG119:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG125:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG125]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[TMP1]]), !dbg [[DBG125]] -// CHECK2-NEXT: ret void, !dbg [[DBG126:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG126:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG126]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[TMP1]]), !dbg [[DBG126]] +// CHECK2-NEXT: ret void, !dbg [[DBG127:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.6 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG127:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG128:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG134:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG134]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG135:![0-9]+]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG135]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG135]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG136:![0-9]+]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG136]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG134]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG135]] // CHECK2: invoke.cont: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG136:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG137:![0-9]+]] -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG138:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG136]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG137:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG138:![0-9]+]] +// CHECK2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG139:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG137]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP4:%.*]] = landingpad { i8*, i32 } -// CHECK2-NEXT: catch i8* null, !dbg [[DBG134]] -// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG134]] -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7]], !dbg [[DBG134]] -// CHECK2-NEXT: unreachable, !dbg [[DBG134]] +// CHECK2-NEXT: catch i8* null, !dbg [[DBG135]] +// CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG135]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7]], !dbg [[DBG135]] +// CHECK2-NEXT: unreachable, !dbg [[DBG135]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG139:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG140:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG145:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG145]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG145]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG145]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG145]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.6(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG145]] -// CHECK2-NEXT: ret void, !dbg [[DBG145]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG146]] +// CHECK2-NEXT: call void @.omp_outlined._debug__.6(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG146]] +// CHECK2-NEXT: ret void, !dbg [[DBG146]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG146:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG147:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] // CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG152:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG152]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG152]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG152]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG152]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.5(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG152]] -// CHECK2-NEXT: ret void, !dbg [[DBG152]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG153]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG153]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG153]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG153]] +// CHECK2-NEXT: call void @.omp_outlined._debug__.5(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG153]] +// CHECK2-NEXT: ret void, !dbg [[DBG153]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG153:![0-9]+]] { +// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG154:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159:![0-9]+]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG160:![0-9]+]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG160]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG160]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG160]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG160]] -// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG161:![0-9]+]] -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG162:![0-9]+]] -// CHECK2-NEXT: ret i32 0, !dbg [[DBG163:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG161]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG161]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG161]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG161]] +// CHECK2-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG162:![0-9]+]] +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG163:![0-9]+]] +// CHECK2-NEXT: ret i32 0, !dbg [[DBG164:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.9 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG164:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG165:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -651,69 +651,70 @@ // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[VAR:%.*]] = alloca double*, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META168:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] // CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG174:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG174]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[TMP0]], align 8, !dbg [[DBG175:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG175:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG175]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[TMP0]], align 8, !dbg [[DBG176:![0-9]+]] // CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(i8** noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG177:![0-9]+]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG178:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]] -// CHECK2-NEXT: [[TMP3:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG186:![0-9]+]] -// CHECK2-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG186]] -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]], !dbg [[DBG186]] -// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 0, !dbg [[DBG186]] -// CHECK2-NEXT: ret void, !dbg [[DBG187:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG187:![0-9]+]] +// CHECK2-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG187]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]], !dbg [[DBG187]] +// CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 0, !dbg [[DBG187]] +// CHECK2-NEXT: ret void, !dbg [[DBG188:![0-9]+]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } -// CHECK2-NEXT: catch i8* null, !dbg [[DBG177]] -// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG177]] -// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG177]] -// CHECK2-NEXT: unreachable, !dbg [[DBG177]] +// CHECK2-NEXT: catch i8* null, !dbg [[DBG178]] +// CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG178]] +// CHECK2-NEXT: call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG178]] +// CHECK2-NEXT: unreachable, !dbg [[DBG178]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG188:![0-9]+]] { +// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG189:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META191:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]] -// CHECK2-NEXT: ret void, !dbg [[DBG193:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] +// CHECK2-NEXT: ret void, !dbg [[DBG194:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10 -// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG194:![0-9]+]] { +// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG195:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196:![0-9]+]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]] // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] // CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] // CHECK2-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG200:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG200]] -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG200]] -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG200]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG200]] -// CHECK2-NEXT: call void @.omp_outlined._debug__.9(i32* [[TMP2]], i32* [[TMP3]], i8*** [[TMP4]], i64 [[TMP1]]) #[[ATTR6]], !dbg [[DBG200]] -// CHECK2-NEXT: ret void, !dbg [[DBG200]] +// CHECK2-NEXT: call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG201:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG201]] +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG201]] +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG201]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG201]] +// CHECK2-NEXT: call void @.omp_outlined._debug__.9(i32* [[TMP2]], i32* [[TMP3]], i8*** [[TMP4]], i64 [[TMP1]]) #[[ATTR6]], !dbg [[DBG201]] +// CHECK2-NEXT: ret void, !dbg [[DBG201]] // // // CHECK3-LABEL: define {{[^@]+}}@main // CHECK3-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 // CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 @@ -731,7 +732,9 @@ // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[VLA]]) +// CHECK3-NEXT: [[GEP_VLA:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +// CHECK3-NEXT: store i32* [[VLA]], i32** [[GEP_VLA]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK3: omp.par.outlined.exit: // CHECK3-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -746,20 +749,22 @@ // // // CHECK3-LABEL: define {{[^@]+}}@main..omp_par -// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[VLA:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK3-NEXT: omp.par.entry: +// CHECK3-NEXT: [[GEP_VLA:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[LOADGEP_VLA:%.*]] = load i32*, i32** [[GEP_VLA]], align 8 // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 // CHECK3-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 1 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: call void @_Z3fooIiEvT_(i32 noundef [[TMP1]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* @global, align 4 -// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 1 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX1]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: call void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4 +// CHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1 +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4 // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]] @@ -778,6 +783,7 @@ // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ // CHECK3-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat { // CHECK3-NEXT: entry: +// CHECK3-NEXT: [[STRUCTARG:%.*]] = alloca { i64*, i8*** }, align 8 // CHECK3-NEXT: [[DOTRELOADED:%.*]] = alloca i64, align 8 // CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 @@ -791,7 +797,11 @@ // CHECK3-NEXT: store i64 [[TMP3]], i64* [[DOTRELOADED]], align 8 // CHECK3-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK3: omp_parallel: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i8***)* @_Z5tmainIPPcEiT_..omp_par to void (i32*, i32*, ...)*), i64* [[DOTRELOADED]], i8*** [[ARGC_ADDR]]) +// CHECK3-NEXT: [[GEP__RELOADED:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[STRUCTARG]], i32 0, i32 0 +// CHECK3-NEXT: store i64* [[DOTRELOADED]], i64** [[GEP__RELOADED]], align 8 +// CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[STRUCTARG]], i32 0, i32 1 +// CHECK3-NEXT: store i8*** [[ARGC_ADDR]], i8**** [[GEP_ARGC_ADDR]], align 8 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i64*, i8*** }*)* @_Z5tmainIPPcEiT_..omp_par to void (i32*, i32*, ...)*), { i64*, i8*** }* [[STRUCTARG]]) // CHECK3-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK3: omp.par.outlined.exit: // CHECK3-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -800,21 +810,25 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par -// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i64* [[DOTRELOADED:%.*]], i8*** [[ARGC_ADDR:%.*]]) #[[ATTR1]] { +// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i64*, i8*** }* [[TMP0:%.*]]) #[[ATTR1]] { // CHECK3-NEXT: omp.par.entry: +// CHECK3-NEXT: [[GEP__RELOADED:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[LOADGEP__RELOADED:%.*]] = load i64*, i64** [[GEP__RELOADED]], align 8 +// CHECK3-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[TMP0]], i32 0, i32 1 +// CHECK3-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGC_ADDR]], align 8 // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 // CHECK3-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTRELOADED]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[LOADGEP__RELOADED]], align 8 // CHECK3-NEXT: [[VAR:%.*]] = alloca double*, align 8 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: -// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 -// CHECK3-NEXT: call void @_Z3fooIPPcEvT_(i8** noundef [[TMP2]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[VAR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]] -// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]] +// CHECK3-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[LOADGEP_ARGC_ADDR]], align 8 +// CHECK3-NEXT: call void @_Z3fooIPPcEvT_(i8** noundef [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[VAR]], align 8 +// CHECK3-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0 // CHECK3-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK3: omp.par.pre_finalize: @@ -834,6 +848,7 @@ // CHECK4-LABEL: define {{[^@]+}}@main // CHECK4-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG11:![0-9]+]] { // CHECK4-NEXT: entry: +// CHECK4-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 // CHECK4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[ARGV_ADDR:%.*]] = alloca i8**, align 8 @@ -841,121 +856,133 @@ // CHECK4-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i32 0, i32* [[RETVAL]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]] +// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] // CHECK4-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGV_ADDR]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]] -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4, !dbg [[DBG20:![0-9]+]] -// CHECK4-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG20]] -// CHECK4-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG20]] -// CHECK4-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG20]] -// CHECK4-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG20]] -// CHECK4-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG20]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG28:![0-9]+]] +// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGV_ADDR]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]] +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4, !dbg [[DBG21:![0-9]+]] +// CHECK4-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG21]] +// CHECK4-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG21]] +// CHECK4-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG21]] +// CHECK4-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG21]] +// CHECK4-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG21]] +// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG29:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[VLA]]), !dbg [[DBG29:![0-9]+]] +// CHECK4-NEXT: [[GEP_VLA:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +// CHECK4-NEXT: store i32* [[VLA]], i32** [[GEP_VLA]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]), !dbg [[DBG30:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK4: omp.par.outlined.exit: // CHECK4-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK4: omp.par.exit.split: -// CHECK4-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG30:![0-9]+]] -// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG30]] -// CHECK4-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG30]] -// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG31:![0-9]+]] -// CHECK4-NEXT: call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG31]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG31]] -// CHECK4-NEXT: ret i32 [[TMP5]], !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG31:![0-9]+]] +// CHECK4-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG31]] +// CHECK4-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG31]] +// CHECK4-NEXT: [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG32:![0-9]+]] +// CHECK4-NEXT: call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG32]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG32]] +// CHECK4-NEXT: ret i32 [[TMP5]], !dbg [[DBG32]] // // // CHECK4-LABEL: define {{[^@]+}}@main..omp_par -// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[VLA:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG32:![0-9]+]] { +// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG33:![0-9]+]] { // CHECK4-NEXT: omp.par.entry: +// CHECK4-NEXT: [[GEP_VLA:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[LOADGEP_VLA:%.*]] = load i32*, i32** [[GEP_VLA]], align 8 // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 1, !dbg [[DBG34:![0-9]+]] -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG34]] -// CHECK4-NEXT: call void @_Z3fooIiEvT_(i32 noundef [[TMP1]]), !dbg [[DBG34]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG34]] -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 1, !dbg [[DBG34]] -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG34]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG34]] +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1, !dbg [[DBG35:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG35]] +// CHECK4-NEXT: call void @_Z3fooIiEvT_(i32 noundef [[TMP2]]), !dbg [[DBG35]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG35]] +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1, !dbg [[DBG35]] +// CHECK4-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG35]] +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG35]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG34]] +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK4-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat !dbg [[DBG35:![0-9]+]] { +// CHECK4-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat !dbg [[DBG36:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG41:![0-9]+]] -// CHECK4-NEXT: ret void, !dbg [[DBG41]] +// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG42:![0-9]+]] +// CHECK4-NEXT: ret void, !dbg [[DBG42]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK4-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat !dbg [[DBG44:![0-9]+]] { +// CHECK4-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat !dbg [[DBG45:![0-9]+]] { // CHECK4-NEXT: entry: +// CHECK4-NEXT: [[STRUCTARG:%.*]] = alloca { i64*, i8*** }, align 8 // CHECK4-NEXT: [[DOTRELOADED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]] -// CHECK4-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG51:![0-9]+]] -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG51]] -// CHECK4-NEXT: [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG51]] -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG51]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG51]] -// CHECK4-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG51]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG52:![0-9]+]] +// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] +// CHECK4-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG52:![0-9]+]] +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG52]] +// CHECK4-NEXT: [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG52]] +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG52]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG52]] +// CHECK4-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG52]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG53:![0-9]+]] // CHECK4-NEXT: store i64 [[TMP3]], i64* [[DOTRELOADED]], align 8 // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i8***)* @_Z5tmainIPPcEiT_..omp_par to void (i32*, i32*, ...)*), i64* [[DOTRELOADED]], i8*** [[ARGC_ADDR]]), !dbg [[DBG53:![0-9]+]] +// CHECK4-NEXT: [[GEP__RELOADED:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[STRUCTARG]], i32 0, i32 0 +// CHECK4-NEXT: store i64* [[DOTRELOADED]], i64** [[GEP__RELOADED]], align 8 +// CHECK4-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[STRUCTARG]], i32 0, i32 1 +// CHECK4-NEXT: store i8*** [[ARGC_ADDR]], i8**** [[GEP_ARGC_ADDR]], align 8 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i64*, i8*** }*)* @_Z5tmainIPPcEiT_..omp_par to void (i32*, i32*, ...)*), { i64*, i8*** }* [[STRUCTARG]]), !dbg [[DBG54:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK4: omp.par.outlined.exit: // CHECK4-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK4: omp.par.exit.split: -// CHECK4-NEXT: ret i32 0, !dbg [[DBG55:![0-9]+]] +// CHECK4-NEXT: ret i32 0, !dbg [[DBG56:![0-9]+]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par -// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i64* [[DOTRELOADED:%.*]], i8*** [[ARGC_ADDR:%.*]]) #[[ATTR1]] !dbg [[DBG56:![0-9]+]] { +// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i64*, i8*** }* [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG57:![0-9]+]] { // CHECK4-NEXT: omp.par.entry: +// CHECK4-NEXT: [[GEP__RELOADED:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[TMP0]], i32 0, i32 0 +// CHECK4-NEXT: [[LOADGEP__RELOADED:%.*]] = load i64*, i64** [[GEP__RELOADED]], align 8 +// CHECK4-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[TMP0]], i32 0, i32 1 +// CHECK4-NEXT: [[LOADGEP_ARGC_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGC_ADDR]], align 8 // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 // CHECK4-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTRELOADED]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i64, i64* [[LOADGEP__RELOADED]], align 8 // CHECK4-NEXT: [[VAR:%.*]] = alloca double*, align 8 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG57:![0-9]+]] -// CHECK4-NEXT: call void @_Z3fooIPPcEvT_(i8** noundef [[TMP2]]), !dbg [[DBG57]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META58:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]] -// CHECK4-NEXT: [[TMP3:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG65]] -// CHECK4-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG65]] -// CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]], !dbg [[DBG65]] -// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0, !dbg [[DBG65]] -// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG66:![0-9]+]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i8**, i8*** [[LOADGEP_ARGC_ADDR]], align 8, !dbg [[DBG58:![0-9]+]] +// CHECK4-NEXT: call void @_Z3fooIPPcEvT_(i8** noundef [[TMP3]]), !dbg [[DBG58]] +// CHECK4-NEXT: call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66:![0-9]+]] +// CHECK4-NEXT: [[TMP4:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG66]] +// CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[DBG66]] +// CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]], !dbg [[DBG66]] +// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0, !dbg [[DBG66]] +// CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG67:![0-9]+]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG66]] +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG67]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK4-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR5]] comdat !dbg [[DBG67:![0-9]+]] { +// CHECK4-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR5]] comdat !dbg [[DBG68:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 // CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71:![0-9]+]] -// CHECK4-NEXT: ret void, !dbg [[DBG71]] -// +// CHECK4-NEXT: call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72:![0-9]+]] +// CHECK4-NEXT: ret void, !dbg [[DBG72]] // diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -41,10 +41,7 @@ /// Finalize the underlying module, e.g., by outlining regions. /// \param Fn The function to be finalized. If not used, /// all functions are finalized. - /// \param AllowExtractorSinking Flag to include sinking instructions, - /// emitted by CodeExtractor, in the - /// outlined region. Default is false. - void finalize(Function *Fn = nullptr, bool AllowExtractorSinking = false); + void finalize(Function *Fn = nullptr); /// Add attributes known for \p FnID to \p Fn. void addAttributes(omp::RuntimeFunction FnID, Function &Fn); @@ -772,6 +769,7 @@ using PostOutlineCBTy = std::function; PostOutlineCBTy PostOutlineCB; BasicBlock *EntryBB, *ExitBB; + SmallVector ExcludeArgsFromAggregate; /// Collect all blocks in between EntryBB and ExitBB in both the given /// vector and set. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -174,7 +174,7 @@ void OpenMPIRBuilder::initialize() { initializeTypes(M); } -void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) { +void OpenMPIRBuilder::finalize(Function *Fn) { SmallPtrSet ParallelRegionBlockSet; SmallVector Blocks; SmallVector DeferredOutlines; @@ -193,7 +193,7 @@ Function *OuterFn = OI.getFunction(); CodeExtractorAnalysisCache CEAC(*OuterFn); CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr, - /* AggregateArgs */ false, + /* AggregateArgs */ true, /* BlockFrequencyInfo */ nullptr, /* BranchProbabilityInfo */ nullptr, /* AssumptionCache */ nullptr, @@ -207,6 +207,9 @@ assert(Extractor.isEligible() && "Expected OpenMP outlining to be possible!"); + for (auto *V : OI.ExcludeArgsFromAggregate) + Extractor.excludeArgFromAggregate(V); + Function *OutlinedFn = Extractor.extractCodeRegion(CEAC); LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n"); @@ -225,25 +228,25 @@ BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock(); assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB); assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry); - if (AllowExtractorSinking) { - // Move instructions from the to-be-deleted ArtificialEntry to the entry - // basic block of the parallel region. CodeExtractor may have sunk - // allocas/bitcasts for values that are solely used in the outlined - // region and do not escape. - assert(!ArtificialEntry.empty() && - "Expected instructions to sink in the outlined region"); - for (BasicBlock::iterator It = ArtificialEntry.begin(), - End = ArtificialEntry.end(); - It != End;) { - Instruction &I = *It; - It++; - - if (I.isTerminator()) - continue; - - I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); - } + // Move instructions from the to-be-deleted ArtificialEntry to the entry + // basic block of the parallel region. CodeExtractor generates + // instructions to unwrap the aggregate argument and may sink + // allocas/bitcasts for values that are solely used in the outlined region + // and do not escape. + assert(!ArtificialEntry.empty() && + "Expected instructions to add in the outlined region entry"); + for (BasicBlock::reverse_iterator It = ArtificialEntry.rbegin(), + End = ArtificialEntry.rend(); + It != End;) { + Instruction &I = *It; + It++; + + if (I.isTerminator()) + continue; + + I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt()); } + OI.EntryBB->moveBefore(&ArtificialEntry); ArtificialEntry.eraseFromParent(); } @@ -811,8 +814,10 @@ getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num); auto PrivHelper = [&](Value &V) { - if (&V == TIDAddr || &V == ZeroAddr) + if (&V == TIDAddr || &V == ZeroAddr) { + OI.ExcludeArgsFromAggregate.push_back(&V); return; + } SetVector Uses; for (Use &U : V.uses()) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -1074,8 +1074,7 @@ BranchInst::Create(AfterBB, AfterIP.getBlock()); // Perform the actual outlining. - OMPInfoCache.OMPBuilder.finalize(OriginalFn, - /* AllowExtractorSinking */ true); + OMPInfoCache.OMPBuilder.finalize(OriginalFn); Function *OutlinedFn = MergableCIs.front()->getCaller(); diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll --- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll +++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll @@ -4692,12 +4692,15 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4708,20 +4711,22 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4835,12 +4840,15 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_seq ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4853,29 +4861,31 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_seq..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: -; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4886,9 +4896,9 @@ ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK1: seq.par.merged: -; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK1-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK1-NEXT: store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -4918,6 +4928,7 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float ; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8 ; CHECK1-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK1-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK1-NEXT: store float [[F]], float* [[F_ADDR]], align 4 @@ -4925,7 +4936,13 @@ ; CHECK1-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK1-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8 +; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK1-NEXT: store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2 +; CHECK1-NEXT: store float* [[P]], float** [[GEP_P]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -4936,30 +4953,36 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_RELOADED:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8 +; CHECK1-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1 +; CHECK1-NEXT: [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2 +; CHECK1-NEXT: [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[F_RELOADED]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: -; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -4970,8 +4993,8 @@ ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK1: seq.par.merged: -; CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], 0x40091EB860000000 -; CHECK1-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000 +; CHECK1-NEXT: store float [[ADD]], float* [[LOADGEP_P]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -5003,13 +5026,18 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i64* }, align 8 ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK1-NEXT: store i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i64* }*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), { i32*, i64* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5022,29 +5050,33 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i64* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 1 +; CHECK1-NEXT: [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load i64*, i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: -; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK1-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: @@ -5056,11 +5088,11 @@ ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK1: seq.par.merged: -; CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK1-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK1-NEXT: store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4 ; CHECK1-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 -; CHECK1-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK1-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -5090,12 +5122,15 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5106,30 +5141,32 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: ; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK1-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: -; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5140,12 +5177,12 @@ ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK1: seq.par.merged: -; CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) -; CHECK1-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 -; CHECK1-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK1-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 -; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to i8* +; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]]) +; CHECK1-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK1-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 +; CHECK1-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +; CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -5175,6 +5212,7 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8 ; CHECK1-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -5183,9 +5221,15 @@ ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: +; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8 +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 +; CHECK1-NEXT: store i32* [[B]], i32** [[GEP_B]], align 8 ; CHECK1-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5198,30 +5242,36 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8 +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 +; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 +; CHECK1-NEXT: [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: -; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_B]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5232,9 +5282,9 @@ ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK1: seq.par.merged: -; CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to i8* -; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -; CHECK1-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8* +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +; CHECK1-NEXT: store i32 [[ADD]], i32* [[LOADGEP_B]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -5266,6 +5316,7 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions ; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8 ; CHECK1-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 @@ -5273,7 +5324,11 @@ ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK1-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5284,20 +5339,24 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1 +; CHECK1-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5340,6 +5399,7 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq ; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8 ; CHECK1-NEXT: [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 @@ -5349,7 +5409,13 @@ ; CHECK1-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK1-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8 +; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK1-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 +; CHECK1-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5360,30 +5426,36 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_RELOADED:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8 +; CHECK1-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 +; CHECK1-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 +; CHECK1-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CANCEL1_RELOADED]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: -; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5394,9 +5466,9 @@ ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK1: seq.par.merged: -; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK1-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK1-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 -; CHECK1-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -5438,12 +5510,15 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_3 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_3..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5454,23 +5529,25 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_3..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) -; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5509,6 +5586,7 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq ; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8 ; CHECK1-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 @@ -5518,7 +5596,15 @@ ; CHECK1-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8 +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 +; CHECK1-NEXT: store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3 +; CHECK1-NEXT: store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5531,42 +5617,50 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8 +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 +; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 +; CHECK1-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK1-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3 +; CHECK1-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4 +; CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK1-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK1: omp_region.end: ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split: -; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -; CHECK1-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -; CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK1-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +; CHECK1-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK1: omp_region.end4: ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split.split.split: -; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5577,9 +5671,9 @@ ; CHECK1: omp_region.body5: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK1: seq.par.merged2: -; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 -; CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP1]] -; CHECK1-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK1-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK1-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]] +; CHECK1-NEXT: store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK1: omp.par.merged.split.split.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] @@ -5589,8 +5683,8 @@ ; CHECK1: omp_region.body: ; CHECK1-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK1: seq.par.merged: -; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -; CHECK1-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +; CHECK1-NEXT: store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK1: omp.par.merged.split: ; CHECK1-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -5739,12 +5833,15 @@ ; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK1-NEXT: entry: +; CHECK1-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK1: omp_parallel: -; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK1-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK1-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK1: omp.par.outlined.exit: ; CHECK1-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5757,20 +5854,22 @@ ; ; ; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par -; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK1-NEXT: omp.par.entry: +; CHECK1-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK1-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK1-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK1-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK1-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK1-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK1: omp.par.region: ; CHECK1-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK1: omp.par.merged: -; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK1-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK1-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK1: entry.split: ; CHECK1-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5809,12 +5908,15 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5825,20 +5927,22 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -5952,12 +6056,15 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_seq ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -5970,29 +6077,31 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -6003,9 +6112,9 @@ ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -6035,6 +6144,7 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float ; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8 ; CHECK2-NEXT: [[F_RELOADED:%.*]] = alloca float, align 4 ; CHECK2-NEXT: [[F_ADDR:%.*]] = alloca float, align 4 ; CHECK2-NEXT: store float [[F]], float* [[F_ADDR]], align 4 @@ -6042,7 +6152,13 @@ ; CHECK2-NEXT: store float [[F]], float* [[F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]]) +; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2 +; CHECK2-NEXT: store float* [[P]], float** [[GEP_P]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6053,30 +6169,36 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_RELOADED:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2 +; CHECK2-NEXT: [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[F_RELOADED]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -6087,8 +6209,8 @@ ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], 0x40091EB860000000 -; CHECK2-NEXT: store float [[ADD]], float* [[P]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000 +; CHECK2-NEXT: store float [[ADD]], float* [[LOADGEP_P]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -6120,13 +6242,18 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i64* }, align 8 ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i64* }*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), { i32*, i64* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6139,29 +6266,33 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i64* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load i64*, i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: @@ -6173,11 +6304,11 @@ ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 -; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 -; CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +; CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4 ; CHECK2-NEXT: [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64 -; CHECK2-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -6207,12 +6338,15 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6223,30 +6357,32 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK2-NEXT: br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -6257,12 +6393,12 @@ ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[B]] to i8* -; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) -; CHECK2-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64 -; CHECK2-NEXT: [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32 -; CHECK2-NEXT: store i32 [[TMP5]], i32* [[B]], align 4 -; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]]) +; CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to i8* +; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]]) +; CHECK2-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[B]] to i64 +; CHECK2-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32 +; CHECK2-NEXT: store i32 [[TMP6]], i32* [[B]], align 4 +; CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -6292,6 +6428,7 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8 ; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 @@ -6300,9 +6437,15 @@ ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 +; CHECK2-NEXT: store i32* [[B]], i32** [[GEP_B]], align 8 ; CHECK2-NEXT: [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8* ; CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]]) -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]]) +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6315,30 +6458,36 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 +; CHECK2-NEXT: [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]]) +; CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_B]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -6349,9 +6498,9 @@ ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[B]] to i8* -; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -; CHECK2-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +; CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8* +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_B]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -6383,6 +6532,7 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8 ; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4 @@ -6390,7 +6540,11 @@ ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6401,20 +6555,24 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -6457,6 +6615,7 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8 ; CHECK2-NEXT: [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[CANCEL1_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[CANCEL2_ADDR:%.*]] = alloca i32, align 4 @@ -6466,7 +6625,13 @@ ; CHECK2-NEXT: store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]]) +; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 +; CHECK2-NEXT: store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6477,30 +6642,36 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_RELOADED:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 +; CHECK2-NEXT: [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CANCEL1_RELOADED]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -6511,9 +6682,9 @@ ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK2-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0 ; CHECK2-NEXT: [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32 -; CHECK2-NEXT: store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -6555,12 +6726,15 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_3 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_3..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6571,23 +6745,25 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_3..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) -; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -6626,6 +6802,7 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq ; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8 ; CHECK2-NEXT: [[A_RELOADED:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4 @@ -6635,7 +6812,15 @@ ; CHECK2-NEXT: store i32 [[A]], i32* [[A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]]) +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1 +; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2 +; CHECK2-NEXT: store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3 +; CHECK2-NEXT: store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6648,42 +6833,50 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8 +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2 +; CHECK2-NEXT: [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8 +; CHECK2-NEXT: [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3 +; CHECK2-NEXT: [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 -; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4 +; CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] +; CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) +; CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +; CHECK2-NEXT: br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]] ; CHECK2: omp_region.end: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split: -; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -; CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) -; CHECK2-NEXT: [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0 -; CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] +; CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]]) +; CHECK2-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +; CHECK2-NEXT: br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]] ; CHECK2: omp_region.end4: ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split.split: -; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] @@ -6694,9 +6887,9 @@ ; CHECK2: omp_region.body5: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED2:%.*]] ; CHECK2: seq.par.merged2: -; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 -; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP1]] -; CHECK2-NEXT: store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]] +; CHECK2-NEXT: store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]] ; CHECK2: omp.par.merged.split.split.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY5_SPLIT:%.*]] @@ -6706,8 +6899,8 @@ ; CHECK2: omp_region.body: ; CHECK2-NEXT: br label [[SEQ_PAR_MERGED:%.*]] ; CHECK2: seq.par.merged: -; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -; CHECK2-NEXT: store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4 +; CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +; CHECK2-NEXT: store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_MERGED_SPLIT:%.*]] ; CHECK2: omp.par.merged.split: ; CHECK2-NEXT: br label [[OMP_REGION_BODY_SPLIT:%.*]] @@ -6856,12 +7049,15 @@ ; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr { ; CHECK2-NEXT: entry: +; CHECK2-NEXT: [[STRUCTARG:%.*]] = alloca { i32* }, align 8 ; CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: br label [[OMP_PARALLEL:%.*]] ; CHECK2: omp_parallel: -; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]]) +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0 +; CHECK2-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 +; CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]) ; CHECK2-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] ; CHECK2: omp.par.outlined.exit: ; CHECK2-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -6874,20 +7070,22 @@ ; ; ; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par -; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] { +; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] { ; CHECK2-NEXT: omp.par.entry: +; CHECK2-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0 +; CHECK2-NEXT: [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8 ; CHECK2-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 -; CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4 -; CHECK2-NEXT: store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4 +; CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4 +; CHECK2-NEXT: store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4 ; CHECK2-NEXT: br label [[OMP_PAR_REGION:%.*]] ; CHECK2: omp.par.region: ; CHECK2-NEXT: br label [[OMP_PAR_MERGED:%.*]] ; CHECK2: omp.par.merged: -; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) ; CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]]) +; CHECK2-NEXT: call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]]) ; CHECK2-NEXT: br label [[ENTRY_SPLIT:%.*]] ; CHECK2: entry.split: ; CHECK2-NEXT: br label [[OMP_PAR_REGION_SPLIT:%.*]] diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -207,6 +207,64 @@ return Store->getValueOperand(); } +// Returns the value stored in the aggregate argument of an outlined function, +// or nullptr if it is not found. +static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate, + unsigned Idx) { + GetElementPtrInst *GEPAtIdx = nullptr; + // Find GEP instruction at that index. + for (User *Usr : Aggregate->users()) { + GetElementPtrInst *GEP = dyn_cast(Usr); + if (!GEP) + continue; + + if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx)) + continue; + + EXPECT_EQ(GEPAtIdx, nullptr); + GEPAtIdx = GEP; + } + + EXPECT_NE(GEPAtIdx, nullptr); + EXPECT_EQ(GEPAtIdx->getNumUses(), 1U); + + // Find the value stored to the aggregate. + StoreInst *StoreToAgg = dyn_cast(*GEPAtIdx->user_begin()); + Value *StoredAggValue = StoreToAgg->getValueOperand(); + + Value *StoredValue = nullptr; + + // Find the value stored to the value stored in the aggregate. + for (User *Usr : StoredAggValue->users()) { + StoreInst *Store = dyn_cast(Usr); + if (!Store) + continue; + + if (Store->getPointerOperand() != StoredAggValue) + continue; + + EXPECT_EQ(StoredValue, nullptr); + StoredValue = Store->getValueOperand(); + } + + return StoredValue; +} + +// Returns the aggregate that the value is originating from. +static Value *findAggregateFromValue(Value *V) { + // Expects a load instruction that loads from the aggregate. + LoadInst *Load = dyn_cast(V); + EXPECT_NE(Load, nullptr); + // Find the GEP instruction used in the load instruction. + GetElementPtrInst *GEP = + dyn_cast(Load->getPointerOperand()); + EXPECT_NE(GEP, nullptr); + // Find the aggregate used in the GEP instruction. + Value *Aggregate = GEP->getPointerOperand(); + + return Aggregate; +} + TEST_F(OpenMPIRBuilderTest, CreateBarrier) { OpenMPIRBuilder OMPBuilder(*M); OMPBuilder.initialize(); @@ -581,8 +639,9 @@ EXPECT_EQ(ForkCI->getArgOperand(1), ConstantInt::get(Type::getInt32Ty(Ctx), 1U)); EXPECT_EQ(ForkCI->getArgOperand(2), Usr); - EXPECT_EQ(findStoredValue(ForkCI->getArgOperand(3)), - F->arg_begin()); + Value *StoredValue = + findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0); + EXPECT_EQ(StoredValue, F->arg_begin()); } TEST_F(OpenMPIRBuilderTest, ParallelNested) { @@ -906,7 +965,8 @@ EXPECT_TRUE(isa(ForkCI->getArgOperand(0))); EXPECT_EQ(ForkCI->getArgOperand(1), ConstantInt::get(Type::getInt32Ty(Ctx), 1)); - Value *StoredForkArg = findStoredValue(ForkCI->getArgOperand(3)); + Value *StoredForkArg = + findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0); EXPECT_EQ(StoredForkArg, F->arg_begin()); EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn); @@ -914,7 +974,7 @@ EXPECT_TRUE(isa(DirectCI->getArgOperand(0))); EXPECT_TRUE(isa(DirectCI->getArgOperand(1))); Value *StoredDirectArg = - findStoredValue(DirectCI->getArgOperand(2)); + findStoredValueInAggregateAt(Ctx, DirectCI->getArgOperand(2), 0); EXPECT_EQ(StoredDirectArg, F->arg_begin()); } @@ -1045,6 +1105,8 @@ Type *I32PtrTy = Type::getInt32PtrTy(M->getContext()); Type *StructTy = StructType::get(I32Ty, I32PtrTy); Type *StructPtrTy = StructTy->getPointerTo(); + StructType *ArgStructTy = + StructType::get(I32PtrTy, StructPtrTy, I32PtrTy, StructPtrTy); Type *VoidTy = Type::getVoidTy(M->getContext()); FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty); FunctionCallee TakeI32Func = @@ -1096,21 +1158,7 @@ Type *Arg2Type = OutlinedFn->getArg(2)->getType(); EXPECT_TRUE(Arg2Type->isPointerTy()); - EXPECT_TRUE(cast(Arg2Type)->isOpaqueOrPointeeTypeMatches(I32Ty)); - - // Arguments that need to be passed through pointers and reloaded will get - // used earlier in the functions and therefore will appear first in the - // argument list after outlining. - Type *Arg3Type = OutlinedFn->getArg(3)->getType(); - EXPECT_TRUE(Arg3Type->isPointerTy()); - EXPECT_TRUE( - cast(Arg3Type)->isOpaqueOrPointeeTypeMatches(StructTy)); - - Type *Arg4Type = OutlinedFn->getArg(4)->getType(); - EXPECT_EQ(Arg4Type, I32PtrTy); - - Type *Arg5Type = OutlinedFn->getArg(5)->getType(); - EXPECT_EQ(Arg5Type, StructPtrTy); + EXPECT_EQ(Arg2Type->getPointerElementType(), ArgStructTy); } TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) { @@ -3031,7 +3079,7 @@ return false; return Store->getPointerOperand() == GlobalLoad->getPointerOperand() && - isa(GlobalLoad->getPointerOperand()); + isa(findAggregateFromValue(GlobalLoad->getPointerOperand())); } /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and @@ -3328,9 +3376,11 @@ auto *SecondAtomic = findSingleUserInBlock(SecondLoad, AtomicBB); ASSERT_NE(FirstAtomic, nullptr); - EXPECT_TRUE(isa(FirstAtomic->getPointerOperand())); + Value *AtomicStorePointer = FirstAtomic->getPointerOperand(); + EXPECT_TRUE(isa(findAggregateFromValue(AtomicStorePointer))); ASSERT_NE(SecondAtomic, nullptr); - EXPECT_TRUE(isa(SecondAtomic->getPointerOperand())); + AtomicStorePointer = SecondAtomic->getPointerOperand(); + EXPECT_TRUE(isa(findAggregateFromValue(AtomicStorePointer))); // Check that the separate reduction function also performs (non-atomic) // reductions after extracting reduction variables from its arguments.