diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -448,7 +448,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -470,7 +470,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -19,22 +19,6 @@ } } - - -// Init firstprivate copy of argc - -// Init firstprivate copy of argv[0:10][0:argc] - -// Register task reduction. - - - - - - - - - #endif // CHECK1-LABEL: define {{[^@]+}}@main // CHECK1-SAME: (i32 noundef [[ARGC:%.*]], ptr noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] { @@ -361,7 +345,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -383,7 +367,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -356,7 +356,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -378,7 +378,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -311,7 +311,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -333,7 +333,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -302,7 +302,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -324,7 +324,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp --- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -344,7 +344,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -366,7 +366,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -704,7 +704,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -726,7 +726,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp --- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -349,7 +349,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -371,7 +371,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -369,7 +369,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -391,7 +391,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -315,7 +315,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -337,7 +337,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -344,7 +344,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -366,7 +366,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 @@ -708,7 +708,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -730,7 +730,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/clang/test/OpenMP/taskloop_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_reduction_codegen.cpp --- a/clang/test/OpenMP/taskloop_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_reduction_codegen.cpp @@ -175,13 +175,13 @@ // CHECK: call void @llvm.memcpy.p0.p0.i64( // CHECK: define internal void @[[RED_FINI2]](ptr noundef %0) -// CHECK: [[RED_SIZE1_ADDR:%.+]] = call ptr @llvm.threadlocal.address.p0(ptr [[RED_SIZE1]] +// CHECK: [[RED_SIZE1_ADDR:%.+]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 [[RED_SIZE1]] // CHECK: load i64, ptr [[RED_SIZE1_ADDR]] // CHECK: call void @ // CHECK: ret void // CHECK: define internal void @[[RED_COMB2]](ptr noundef %0, ptr noundef %1) -// CHECK: [[RED_SIZE1_ADDR2:%.+]] = call ptr @llvm.threadlocal.address.p0(ptr [[RED_SIZE1]] +// CHECK: [[RED_SIZE1_ADDR2:%.+]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 [[RED_SIZE1]] // CHECK: load i64, ptr [[RED_SIZE1_ADDR2]] // CHECK: call void [[OMP_COMB1]]( // CHECK: ret void @@ -196,13 +196,13 @@ // CHECK: ret void // CHECK: define internal void @[[RED_INIT4]](ptr noalias noundef %{{.+}}, ptr noalias noundef %{{.+}}) -// CHECK: [[RED_SIZE2_ADDR:%.+]] = call ptr @llvm.threadlocal.address.p0(ptr [[RED_SIZE2]] +// CHECK: [[RED_SIZE2_ADDR:%.+]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 [[RED_SIZE2]] // CHECK: load i64, ptr [[RED_SIZE2_ADDR]] // CHECK: store float 0.000000e+00, ptr % // CHECK: ret void // CHECK: define internal void @[[RED_COMB4]](ptr noundef %0, ptr noundef %1) -// CHECK: [[RED_SIZE2_ADDR2:%.+]] = call ptr @llvm.threadlocal.address.p0(ptr [[RED_SIZE2]] +// CHECK: [[RED_SIZE2_ADDR2:%.+]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 [[RED_SIZE2]] // CHECK: load i64, ptr [[RED_SIZE2_ADDR2]] // CHECK: fadd float % // CHECK: store float %{{.+}}, ptr % diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -348,7 +348,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -370,7 +370,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 @@ -712,7 +712,7 @@ // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[TMP3]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i64 [[TMP4]] // CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq ptr [[TMP2]], [[TMP5]] @@ -734,7 +734,7 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 // CHECK1-NEXT: store ptr [[TMP1]], ptr [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.threadlocal.address.p0(ptr @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP2:%.*]] = call align 8 ptr @llvm.threadlocal.address.p0(ptr align 8 @{{reduction_size[.].+[.]}}) // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR1]], align 8 diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4225,10 +4225,12 @@ // variable for possibly changing that to internal or private, or maybe // create different versions of the function for different OMP internal // variables. - Elem.second = new GlobalVariable( + auto *GV = new GlobalVariable( M, Ty, /*IsConstant=*/false, GlobalValue::CommonLinkage, Constant::getNullValue(Ty), Elem.first(), /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AddressSpace); + GV->setAlignment(M.getDataLayout().getABITypeAlign(Ty)); + Elem.second = GV; } return cast(&*Elem.second);