Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -1730,6 +1730,10 @@ LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo, bool isNontemporal) { + if (auto *GV = dyn_cast(Addr.getPointer())) + if (GV->isThreadLocal()) + Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV)); + if (const auto *ClangVecTy = Ty->getAs()) { // Boolean vectors use `iN` as storage type. if (ClangVecTy->isExtVectorBoolType()) { @@ -1871,6 +1875,10 @@ LValueBaseInfo BaseInfo, TBAAAccessInfo TBAAInfo, bool isInit, bool isNontemporal) { + if (auto *GV = dyn_cast(Addr.getPointer())) + if (GV->isThreadLocal()) + Addr = Addr.withPointer(Builder.CreateThreadLocalAddress(GV)); + llvm::Type *SrcTy = Value->getType(); if (const auto *ClangVecTy = Ty->getAs()) { auto *VecTy = dyn_cast(SrcTy); @@ -2601,6 +2609,10 @@ } llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD); + + if (VD->getTLSKind() != VarDecl::TLS_None) + V = CGF.Builder.CreateThreadLocalAddress(V); + llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType()); V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); @@ -2870,6 +2882,11 @@ llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); } + // Handle threadlocal function locals. + if (VD->getTLSKind() != VarDecl::TLS_None) { + auto *var = Builder.CreateThreadLocalAddress(addr.getPointer()); + addr = Address(var, addr.getElementType(), addr.getAlignment()); + } // Check for OpenMP threadprivate variables. if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd && Index: clang/lib/CodeGen/ItaniumCXXABI.cpp =================================================================== --- clang/lib/CodeGen/ItaniumCXXABI.cpp +++ clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2984,14 +2984,16 @@ // For a reference, the result of the wrapper function is a pointer to // the referenced object. - llvm::Value *Val = Var; + llvm::Value *Val = Builder.CreateThreadLocalAddress(Var); + if (VD->getType()->isReferenceType()) { CharUnits Align = CGM.getContext().getDeclAlign(VD); - Val = Builder.CreateAlignedLoad(Var->getValueType(), Var, Align); + Val = Builder.CreateAlignedLoad(Var->getValueType(), Val, Align); } if (Val->getType() != Wrapper->getReturnType()) Val = Builder.CreatePointerBitCastOrAddrSpaceCast( Val, Wrapper->getReturnType(), ""); + Builder.CreateRet(Val); } } Index: clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp =================================================================== --- clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp +++ clang/test/CodeGenCXX/cxx11-thread-local-instantiated.cpp @@ -17,7 +17,8 @@ // CHECK-LABEL: define weak_odr hidden {{.*}} @_ZTWN3TLSI1SE5mDataE() {{.*}} comdat { // CHECK: call void @_ZTHN3TLSI1SE5mDataE() -// CHECK: ret {{.*}} @_ZN3TLSI1SE5mDataE +// CHECK: [[TLSmData_ADDR:%[^ ]+]] = call ptr @llvm.threadlocal.address.p0(ptr @_ZN3TLSI1SE5mDataE) +// CHECK: ret {{.*}} [[TLSmData_ADDR]] // Unlike for a global, the global initialization function must not be in a // COMDAT with the variable, because it is referenced from the _ZTH function Index: clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp =================================================================== --- clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp +++ clang/test/CodeGenCXX/cxx11-thread-local-reference.cpp @@ -15,7 +15,8 @@ // CHECK: define {{.*}} @[[R_INIT:.*]]() // CHECK: call noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) i32* @_Z1fv() -// CHECK: store i32* %{{.*}}, i32** @r, align 8 +// CHECK: %[[R_ADDR:.+]] = call i32** @llvm.threadlocal.address.p0p0i32(i32** @r) +// CHECK: store i32* %{{.*}}, i32** %[[R_ADDR]], align 8 // CHECK-LABEL: define{{.*}} nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) i32* @_Z1gv() // LINUX_AIX: call i32* @_ZTW1r() @@ -26,7 +27,8 @@ // DARWIN: define cxx_fast_tlscc noundef i32* @_ZTW1r() [[ATTR1:#[0-9]+]] { // LINUX_AIX: call void @_ZTH1r() // DARWIN: call cxx_fast_tlscc void @_ZTH1r() -// CHECK: load i32*, i32** @r, align 8 +// CHECK: %[[R_ADDR2:.+]] = call i32** @llvm.threadlocal.address.p0p0i32(i32** @r) +// CHECK: load i32*, i32** %[[R_ADDR2]], align 8 // CHECK: ret i32* %{{.*}} // LINUX_AIX-LABEL: define internal void @__tls_init() Index: clang/test/CodeGenCXX/cxx11-thread-local.cpp =================================================================== --- clang/test/CodeGenCXX/cxx11-thread-local.cpp +++ clang/test/CodeGenCXX/cxx11-thread-local.cpp @@ -108,7 +108,8 @@ // CHECK: define {{.*}} @[[A_INIT:.*]]() // CHECK: call{{.*}} i32 @_Z1fv() -// CHECK-NEXT: store i32 {{.*}}, i32* @a, align 4 +// CHECK: [[A_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK-NEXT: store i32 {{.*}}, i32* [[A_ADDR]], align 4 // CHECK-LABEL: define{{.*}} i32 @_Z1fv() int f() { @@ -117,12 +118,14 @@ // CHECK: br i1 %[[NEED_INIT]]{{.*}} // CHECK: %[[CALL:.*]] = call{{.*}} i32 @_Z1gv() - // CHECK: store i32 %[[CALL]], i32* @_ZZ1fvE1n, align 4 + // CHECK: [[N_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ1fvE1n) + // CHECK: store i32 %[[CALL]], i32* [[N_ADDR]], align 4 // CHECK: store i8 1, i8* @_ZGVZ1fvE1n // CHECK: br label static thread_local int n = g(); - - // CHECK: load i32, i32* @_ZZ1fvE1n, align 4 + + // CHECK: [[N_ADDR2:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ1fvE1n) + // CHECK: load i32, i32* [[N_ADDR2]], align 4 return n; } @@ -140,17 +143,20 @@ // LINUX: br label // AIX-NOT: br label // finally: -// LINUX_AIX: ret i32* @b +// LINUX_AIX: [[B_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @b) +// LINUX_AIX: ret i32* [[B_ADDR]] // DARWIN-LABEL: declare cxx_fast_tlscc noundef i32* @_ZTW1b() // There is no definition of the thread wrapper on Darwin for external TLV. // CHECK: define {{.*}} @[[D_INIT:.*]]() // CHECK: call{{.*}} i32 @_Z1gv() -// CHECK-NEXT: store i32 %{{.*}}, i32* @_ZL1d, align 4 +// CHECK-NEXT: [[D_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZL1d) +// CHECK-NEXT: store i32 %{{.*}}, i32* [[D_ADDR]], align 4 // CHECK: define {{.*}} @[[U_M_INIT:.*]]() // CHECK: call{{.*}} i32 @_Z1fv() -// CHECK-NEXT: store i32 %{{.*}}, i32* @_ZN1U1mE, align 4 +// CHECK-NEXT: [[UM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1U1mE) +// CHECK-NEXT: store i32 %{{.*}}, i32* [[UM_ADDR]], align 4 // CHECK: define {{.*}} @[[E_INIT:.*]]() // LINUX_AIX: call i32* @_ZTWN1VIiE1mE() @@ -164,18 +170,21 @@ // DARWIN-LABEL: define weak_odr hidden cxx_fast_tlscc noundef i32* @_ZTWN1VIiE1mE() // LINUX_AIX: call void @_ZTHN1VIiE1mE() // DARWIN: call cxx_fast_tlscc void @_ZTHN1VIiE1mE() -// CHECK: ret i32* @_ZN1VIiE1mE +// CHECK: [[VM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1VIiE1mE) +// CHECK: ret i32* [[VM_ADDR]] // LINUX_AIX-LABEL: define weak_odr hidden noundef i32* @_ZTWN1WIiE1mE() // DARWIN-LABEL: define weak_odr hidden cxx_fast_tlscc noundef i32* @_ZTWN1WIiE1mE() // CHECK-NOT: call -// CHECK: ret i32* @_ZN1WIiE1mE +// CHECK: [[WM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1WIiE1mE) +// CHECK: ret i32* [[WM_ADDR]] // LINUX_AIX-LABEL: define weak_odr hidden {{.*}}* @_ZTWN1XIiE1mE() // DARWIN-LABEL: define weak_odr hidden cxx_fast_tlscc {{.*}}* @_ZTWN1XIiE1mE() // LINUX_AIX: call void @_ZTHN1XIiE1mE() // DARWIN: call cxx_fast_tlscc void @_ZTHN1XIiE1mE() -// CHECK: ret {{.*}}* @_ZN1XIiE1mE +// CHECK: [[XM_ADDR:%.+]] = call %struct.Dtor* @llvm.threadlocal.address.p0s_struct.Dtors(%struct.Dtor* @_ZN1XIiE1mE) +// CHECK: ret {{.*}}* [[XM_ADDR]] // LINUX_AIX: define internal void @[[VF_M_INIT]]() // DARWIN: define internal cxx_fast_tlscc void @[[VF_M_INIT]]() @@ -185,7 +194,8 @@ // CHECK: br i1 %[[VF_M_INITIALIZED]], // need init: // CHECK: call{{.*}} i32 @_Z1gv() -// CHECK: store i32 %{{.*}}, i32* @_ZN1VIfE1mE, align 4 +// CHECK: [[VFM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1VIfE1mE) +// CHECK: store i32 %{{.*}}, i32* [[VFM_ADDR]], align 4 // CHECK: store i8 1, i8* bitcast (i64* @_ZGVN1VIfE1mE to i8*) // CHECK: br label @@ -212,21 +222,24 @@ // LINUX: br i1 icmp ne (void ()* @_ZTHN1VIcE1mE, // AIX-NOT: br i1 icmp ne (void ()* @_ZTHN1VIcE1mE // LINUX_AIX: call void @_ZTHN1VIcE1mE() -// LINUX_AIX: ret i32* @_ZN1VIcE1mE +// LINUX_AIX: [[VEM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1VIcE1mE) +// LINUX_AIX: ret i32* [[VEM_ADDR]] // DARWIN: declare cxx_fast_tlscc noundef i32* @_ZTWN1WIcE1mE() // LINUX_AIX: define linkonce_odr hidden noundef i32* @_ZTWN1WIcE1mE() {{#[0-9]+}}{{( comdat)?}} { // LINUX: br i1 icmp ne (void ()* @_ZTHN1WIcE1mE, // AIX-NOT: br i1 icmp ne (void ()* @_ZTHN1WIcE1mE, // LINUX_AIX: call void @_ZTHN1WIcE1mE() -// LINUX_AIX: ret i32* @_ZN1WIcE1mE +// LINUX_AIX: [[WEM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1WIcE1mE) +// LINUX_AIX: ret i32* [[WEM_ADDR]] // DARWIN: declare cxx_fast_tlscc {{.*}}* @_ZTWN1XIcE1mE() // LINUX_AIX: define linkonce_odr hidden {{.*}}* @_ZTWN1XIcE1mE() {{#[0-9]+}}{{( comdat)?}} { // LINUX: br i1 icmp ne (void ()* @_ZTHN1XIcE1mE, // AIX-NOT: br i1 icmp ne (void ()* @_ZTHN1XIcE1mE, // LINUX_AIX: call void @_ZTHN1XIcE1mE() -// LINUX_AIX: ret {{.*}}* @_ZN1XIcE1mE +// LINUX_AIX: [[XEM_ADDR:%.+]] = call %struct.Dtor* @llvm.threadlocal.address.p0s_struct.Dtors(%struct.Dtor* @_ZN1XIcE1mE) +// LINUX_AIX: ret {{.*}}* [[XEM_ADDR]] struct S { S(); ~S(); }; struct T { ~T(); }; @@ -317,7 +330,8 @@ // CHECK: br i1 %[[V_M_INITIALIZED]], // need init: // CHECK: call{{.*}} i32 @_Z1gv() -// CHECK: store i32 %{{.*}}, i32* @_ZN1VIiE1mE, align 4 +// CHECK: [[VEM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1VIiE1mE) +// CHECK: store i32 %{{.*}}, i32* [[VEM_ADDR]], align 4 // CHECK: store i8 1, i8* bitcast (i64* @_ZGVN1VIiE1mE to i8*) // CHECK: br label @@ -359,7 +373,8 @@ // DARWIN: define cxx_fast_tlscc noundef i32* @_ZTW1a() // LINUX_AIX: call void @_ZTH1a() // DARWIN: call cxx_fast_tlscc void @_ZTH1a() -// CHECK: ret i32* @a +// CHECK: [[A_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK: ret i32* [[A_ADDR]] // CHECK: } @@ -372,7 +387,8 @@ // DARWIN-LABEL: define cxx_fast_tlscc noundef i32* @_ZTWN1U1mE() // LINUX_AIX: call void @_ZTHN1U1mE() // DARWIN: call cxx_fast_tlscc void @_ZTHN1U1mE() -// CHECK: ret i32* @_ZN1U1mE +// CHECK: [[UM_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN1U1mE) +// CHECK: ret i32* [[UM_ADDR]] // LINUX_AIX: declare extern_weak void @_ZTH1b() [[ATTR:#[0-9]+]] Index: clang/test/CodeGenCXX/cxx1y-variable-template.cpp =================================================================== --- clang/test/CodeGenCXX/cxx1y-variable-template.cpp +++ clang/test/CodeGenCXX/cxx1y-variable-template.cpp @@ -41,7 +41,8 @@ // CHECK: icmp eq i8 {{.*}}, 0 // CHECK: br i1 // CHECK: call noundef i32 @_ZN7PR421111fEv( - // CHECK: store i32 {{.*}}, i32* @_ZN7PR4211112_GLOBAL__N_11nILi0EEE + // CHECK: [[N_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN7PR4211112_GLOBAL__N_11nILi0EEE) + // CHECK: store i32 {{.*}}, i32* [[N_ADDR]] // CHECK: store i8 1, i8* @_ZGVN7PR4211112_GLOBAL__N_11nILi0EEE int g() { return n<> + n<>; } } Index: clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp =================================================================== --- clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp +++ clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp @@ -31,7 +31,8 @@ // CHECK-LABEL: define{{.*}} i32 @_Z5get_bv() // CHECK-NOT: call -// CHECK: load i32, i32* @b +// CHECK: [[B_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @b) +// CHECK: load i32, i32* [[B_ADDR]] // CHECK-NOT: call // CHECK: } int get_b() { return b; } @@ -52,7 +53,8 @@ // LINUX-LABEL: define weak_odr {{.*}} @_ZTW1c() // CHECK-NOT: br i1 // CHECK-NOT: call -// CHECK: ret i32* @c +// CHECK: [[C_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @c) +// CHECK: ret i32* [[C_ADDR]] // CHECK: } thread_local int c = 0; Index: clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp =================================================================== --- clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp +++ clang/test/CodeGenCXX/microsoft-abi-thread-safe-statics.cpp @@ -38,7 +38,8 @@ // CHECK-NEXT: br label %[[init_end:.*]] // CHECK: [[init_end]]: -// CHECK-NEXT: ret %struct.S* @"?s@?1??f@@YAAAUS@@XZ@4U2@A" +// CHECK: [[S_ADDR:%.+]] = call %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* @"?s@?1??f@@YAAAUS@@XZ@4U2@A") +// CHECK-NEXT: ret %struct.S* [[S_ADDR]] // CHECK: [[lpad:.*]]: // CHECK-NEXT: cleanuppad within none [] Index: clang/test/CodeGenCXX/pr18635.cpp =================================================================== --- clang/test/CodeGenCXX/pr18635.cpp +++ clang/test/CodeGenCXX/pr18635.cpp @@ -4,7 +4,9 @@ // CHECK: [[X_GLOBAL:@[^ ]+]]{{.*}}thread_local global // returned somewhere in TLS wrapper: -// CHECK: ret{{.*}}[[X_GLOBAL]] +// CHECK: define {{.+}} ptr @_ZTW1x( +// CHECK: [[X_GLOBAL_ADDR:%[^ ]+]] = call ptr @llvm.threadlocal.address.p0(ptr [[X_GLOBAL]]) +// CHECK: ret{{.*}}[[X_GLOBAL_ADDR]] template class unique_ptr { template struct pair { @@ -19,4 +21,3 @@ thread_local unique_ptr x; int main() { x = unique_ptr(new int(5)); } - Index: clang/test/CodeGenCXX/threadlocal_address.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/threadlocal_address.cpp @@ -0,0 +1,75 @@ +// Test that the use of thread local variables would be wrapped by @llvm.threadlocal.address intrinsics. +// RUN: %clang_cc1 -std=c++11 -emit-llvm -triple %itanium_abi_triple -o - %s -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -emit-llvm -triple %itanium_abi_triple -o - -O1 %s | FileCheck %s -check-prefix=CHECK-O1 +// RUN: %clang_cc1 -std=c++11 -no-opaque-pointers -emit-llvm -triple %itanium_abi_triple -o - %s -disable-llvm-passes | FileCheck %s -check-prefix=CHECK-NOOPAQUE +thread_local int i; +int g() { + i++; + return i; +} +// CHECK: @i = thread_local global i32 0 +// CHECK: @_ZZ1fvE1j = internal thread_local global i32 0 +// +// CHECK: @_Z1gv() +// CHECK-NEXT: entry +// CHECK-NEXT: %[[IA:.+]] = call ptr @llvm.threadlocal.address.p0(ptr @i) +// CHECK-NEXT: %[[VA:.+]] = load i32, ptr %[[IA]] +// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1 +// CHECK-NEXT: store i32 %[[INC]], ptr %[[IA]], align 4 +// CHECK-NEXT: %[[IA2:.+]] = call ptr @llvm.threadlocal.address.p0(ptr @i) +// CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[IA2]], align 4 +// CHECK-NEXT: ret i32 %[[RET]] +// +// CHECK: declare ptr @llvm.threadlocal.address.p0(ptr) #[[ATTR_NUM:.+]] +// +// CHECK-O1-LABEL: @_Z1gv +// CHECK-O1-NEXT: entry: +// CHECK-O1-NEXT: %[[I_ADDR:.+]] = call ptr @llvm.threadlocal.address.p0(ptr nonnull @i) +// CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr %[[I_ADDR]] +// CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 +// CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[I_ADDR]] +// CHECK-O1-NEXT: ret i32 %[[INC]] +// +// CHECK-NOOPAQUE-LABEL: @_Z1gv +// CHECK-NOOPAQUE-NEXT: entry: +// CHECK-NOOPAQUE-NEXT: %[[I_ADDR:.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @i) +// CHECK-NOOPAQUE-NEXT: %[[VAL:.+]] = load i32, i32* %[[I_ADDR]] +// CHECK-NOOPAQUE-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 +// CHECK-NOOPAQUE-NEXT: store i32 %[[INC]], i32* %[[I_ADDR]] +// CHECK-NOOPAQUE-NEXT: %[[IA2:.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @i) +// CHECK-NOOPAQUE-NEXT: %[[RET:.+]] = load i32, i32* %[[IA2]], align 4 +// CHECK-NOOPAQUE-NEXT: ret i32 %[[RET]] +int f() { + thread_local int j = 0; + j++; + return j; +} +// CHECK: @_Z1fv() +// CHECK-NEXT: entry +// CHECK-NEXT: %[[JA:.+]] = call ptr @llvm.threadlocal.address.p0(ptr @_ZZ1fvE1j) +// CHECK-NEXT: %[[VA:.+]] = load i32, ptr %[[JA]] +// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1 +// CHECK-NEXT: store i32 %[[INC]], ptr %[[JA]], align 4 +// CHECK-NEXT: %[[JA2:.+]] = call ptr @llvm.threadlocal.address.p0(ptr @_ZZ1fvE1j) +// CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[JA2]], align 4 +// CHECK-NEXT: ret i32 %[[RET]] +// +// CHECK-O1-LABEL: @_Z1fv +// CHECK-O1-NEXT: entry: +// CHECK-O1-NEXT: %[[J_ADDR:.+]] = call ptr @llvm.threadlocal.address.p0(ptr nonnull @_ZZ1fvE1j) +// CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr %[[J_ADDR]] +// CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 +// CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[J_ADDR]] +// CHECK-O1-NEXT: ret i32 %[[INC]] +// +// CHECK-NOOPAQUE: @_Z1fv() +// CHECK-NOOPAQUE-NEXT: entry +// CHECK-NOOPAQUE-NEXT: %[[JA:.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ1fvE1j) +// CHECK-NOOPAQUE-NEXT: %[[VA:.+]] = load i32, i32* %[[JA]] +// CHECK-NOOPAQUE-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1 +// CHECK-NOOPAQUE-NEXT: store i32 %[[INC]], i32* %[[JA]], align 4 +// CHECK-NOOPAQUE-NEXT: %[[JA2:.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ1fvE1j) +// CHECK-NOOPAQUE-NEXT: %[[RET:.+]] = load i32, i32* %[[JA2]], align 4 +// CHECK-NOOPAQUE-NEXT: ret i32 %[[RET]] +// +// CHECK: attributes #[[ATTR_NUM]] = { nocallback nofree nosync nounwind readnone speculatable willreturn } Index: clang/test/Modules/initializers.cpp =================================================================== --- clang/test/Modules/initializers.cpp +++ clang/test/Modules/initializers.cpp @@ -174,11 +174,13 @@ // CHECK: define {{.*}} @[[G_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: store {{.*}}, i32* @[[G]], +// CHECK: [[G_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[G]]) +// CHECK: store {{.*}}, i32* [[G_ADDR]] // CHECK: define {{.*}} @[[H_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: store {{.*}}, i32* @[[H]], +// CHECK: [[H_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[H]]) +// CHECK: store {{.*}}, i32* [[H_ADDR]], // FIXME: Should this use __cxa_guard_acquire? // CHECK: define {{.*}} @[[XA_INIT]]() @@ -187,7 +189,8 @@ // CHECK: define {{.*}} @[[XC_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: store {{.*}}, i32* @[[XC]], +// CHECK: [[XC_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[XC]]) +// CHECK: store {{.*}}, i32* [[XC_ADDR]], // FIXME: Should this use __cxa_guard_acquire? // CHECK: define {{.*}} @[[XE_INIT]]() @@ -196,11 +199,13 @@ // CHECK: define {{.*}} @[[XG_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: store {{.*}}, i32* @[[XG]], +// CHECK: [[XG_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[XG]]) +// CHECK: store {{.*}}, i32* [[XG_ADDR]], // CHECK: define {{.*}} @[[XH_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: store {{.*}}, i32* @[[XH]], +// CHECK: [[XH_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[XH]]) +// CHECK: store {{.*}}, i32* [[XH_ADDR]], // FIXME: Should this use __cxa_guard_acquire? // CHECK: define {{.*}} @[[XF_INIT]]() @@ -209,7 +214,8 @@ // CHECK: define {{.*}} @[[XD_INIT:__cxx_global.*]]() // CHECK: load {{.*}} (i64* @_ZGV -// CHECK: store {{.*}}, i32* @[[XD]], +// CHECK: [[XD_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[XD]]) +// CHECK: store {{.*}}, i32* [[XD_ADDR]], // FIXME: Should this use __cxa_guard_acquire? // CHECK: define {{.*}} @[[XB_INIT]]() @@ -226,11 +232,13 @@ // CHECK-IMPORT: define {{.*}} @[[C_INIT:__cxx_global.*]]() // CHECK-IMPORT: call noundef i32 @_Z11non_trivialv( -// CHECK-IMPORT: store {{.*}}, i32* @[[C]], +// CHECK-IMPORT: [[C_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[C]]) +// CHECK-IMPORT: store {{.*}}, i32* [[C_ADDR]], // CHECK-IMPORT: define {{.*}} @[[D_INIT:__cxx_global.*]]() // CHECK-IMPORT: load {{.*}} (i64* @_ZGV -// CHECK-IMPORT: store {{.*}}, i32* @[[D]], +// CHECK-IMPORT: [[D_ADDR:%.+]] = call i32* @llvm.threadlocal.address.p0i32(i32* @[[D]]) +// CHECK-IMPORT: store {{.*}}, i32* [[D_ADDR]], // CHECK-IMPORT: define {{.*}} @[[TU_INIT]]() Index: clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -49,7 +49,7 @@ // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i64 [[TMP1]], i8** [[TMP2]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i64 [[TMP1]], i8** [[TMP2]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret i32 0 // // @@ -463,15 +463,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -484,32 +485,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -562,14 +564,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -590,9 +592,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/for_reduction_task_codegen.cpp +++ clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -372,15 +372,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -393,32 +394,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -471,14 +473,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -499,9 +501,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/parallel_copyin_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_copyin_codegen.cpp +++ clang/test/OpenMP/parallel_copyin_codegen.cpp @@ -1106,13 +1106,18 @@ // CHECK11-NEXT: store i8 1, i8* @_ZGVZ4mainE3var, align 1 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5t_var, [2 x i32]* @_ZZ4mainE3vec, [2 x %struct.S]* @_ZZ4mainE5s_arr, %struct.S* @_ZZ4mainE3var) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP4:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP5:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ4mainE3vec) +// CHECK11-NEXT: [[TMP6:%.*]] = call [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[TMP7:%.*]] = call %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* @_ZZ4mainE3var) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP4]], [2 x i32]* [[TMP5]], [2 x %struct.S]* [[TMP6]], %struct.S* [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32* [[TMP8]]) // CHECK11-NEXT: [[CALL4:%.*]] = call noundef i32 @_Z5tmainIiET_v() // CHECK11-NEXT: store i32 [[CALL4]], i32* [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIfED1Ev(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP4]] +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP9]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC1Ev @@ -1194,34 +1199,49 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 // CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], ptrtoint (i32* @_ZZ4mainE5t_var to i64) -// CHECK11-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: [[TMP4:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP0]] to i64 +// CHECK11-NEXT: [[TMP6:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: br i1 [[TMP7]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* @_ZZ4mainE5t_var, align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ4mainE3vec to i8*), i8* align 4 [[TMP7]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* -// CHECK11-NEXT: br i1 icmp eq (%struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), %struct.S* getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ4mainE3vec) +// CHECK11-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP9]] to i8* +// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i64 8, i1 false) +// CHECK11-NEXT: [[TMP12:%.*]] = call [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP12]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP2]] to %struct.S* +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S:%.*]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 1, i64 0) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var, %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]]) +// CHECK11-NEXT: [[TMP15:%.*]] = call %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* @_ZZ4mainE3var) +// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP3]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP10]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZZ4mainE3vec, i64 0, i64 0), align 4 -// CHECK11-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S], [2 x %struct.S]* @_ZZ4mainE5s_arr, i64 0, i64 0), %struct.S* noundef nonnull align 4 dereferenceable(4) @_ZZ4mainE3var) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP17]]) +// CHECK11-NEXT: [[TMP18:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ4mainE3vec) +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP20]], i64 0, i64 0 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = call %struct.S* @llvm.threadlocal.address.p0s_struct.Ss(%struct.S* @_ZZ4mainE3var) +// CHECK11-NEXT: [[TMP22:%.*]] = call [2 x %struct.S]* @llvm.threadlocal.address.p0a2s_struct.Ss([2 x %struct.S]* @_ZZ4mainE5s_arr) +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP22]], i64 0, i64 0 +// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S* @_ZN1SIfEaSERKS0_(%struct.S* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S* noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK11-NEXT: ret void // // @@ -1235,20 +1255,23 @@ // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @_ZZ4mainE5t_var to i64) -// CHECK11-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 +// CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 +// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] +// CHECK11-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* @_ZZ4mainE5t_var, align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP1]], align 4 // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* @_ZZ4mainE5t_var, align 4 -// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK11-NEXT: store i32 [[INC]], i32* @_ZZ4mainE5t_var, align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ4mainE5t_var) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK11-NEXT: [[INC:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK11-NEXT: store i32 [[INC]], i32* [[TMP8]], align 4 // CHECK11-NEXT: ret void // // @@ -1280,8 +1303,13 @@ // CHECK11-NEXT: store i8 1, i8* bitcast (i64* @_ZGVZ5tmainIiET_vE3var to i8*), align 8 // CHECK11-NEXT: br label [[INIT_END3]] // CHECK11: init.end3: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* @_ZZ5tmainIiET_vE5t_var, [2 x i32]* @_ZZ5tmainIiET_vE3vec, [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, %struct.S.0* @_ZZ5tmainIiET_vE3var) -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP4:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP5:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: [[TMP6:%.*]] = call [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[TMP7:%.*]] = call %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, [2 x i32]*, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32* [[TMP4]], [2 x i32]* [[TMP5]], [2 x %struct.S.0]* [[TMP6]], %struct.S.0* [[TMP7]]) +// CHECK11-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i32* [[TMP8]]) // CHECK11-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] // CHECK11-NEXT: ret i32 0 // @@ -1293,8 +1321,9 @@ // CHECK11-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 128 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP0]] to float +// CHECK11-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float // CHECK11-NEXT: store float [[CONV]], float* [[F]], align 4 // CHECK11-NEXT: ret void // @@ -1318,8 +1347,9 @@ // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load float, float* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 128 -// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to float +// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP1]], align 128 +// CHECK11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP2]] to float // CHECK11-NEXT: [[ADD:%.*]] = fadd float [[TMP0]], [[CONV]] // CHECK11-NEXT: store float [[ADD]], float* [[F]], align 4 // CHECK11-NEXT: ret void @@ -1404,34 +1434,49 @@ // CHECK11-NEXT: [[TMP1:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 // CHECK11-NEXT: [[TMP2:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 // CHECK11-NEXT: [[TMP3:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP4:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64) -// CHECK11-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: [[TMP4:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP5:%.*]] = ptrtoint i32* [[TMP0]] to i64 +// CHECK11-NEXT: [[TMP6:%.*]] = ptrtoint i32* [[TMP4]] to i64 +// CHECK11-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP5]], [[TMP6]] +// CHECK11-NEXT: br i1 [[TMP7]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP6]], i32* @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK11-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 bitcast ([2 x i32]* @_ZZ5tmainIiET_vE3vec to i8*), i8* align 128 [[TMP7]], i64 8, i1 false) -// CHECK11-NEXT: [[TMP8:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* -// CHECK11-NEXT: br i1 icmp eq (%struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), %struct.S.0* getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 128 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[TMP4]], align 128 +// CHECK11-NEXT: [[TMP9:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: [[TMP10:%.*]] = bitcast [2 x i32]* [[TMP9]] to i8* +// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [2 x i32]* [[TMP1]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 128 [[TMP10]], i8* align 128 [[TMP11]], i64 8, i1 false) +// CHECK11-NEXT: [[TMP12:%.*]] = call [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP12]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP2]] to %struct.S.0* +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP8]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0:%.*]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 1, i64 0) +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] // CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK11: omp.arraycpy.done1: -// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var, %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]]) +// CHECK11-NEXT: [[TMP15:%.*]] = call %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[CALL2:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP3]]) // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP10]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ5tmainIiET_vE5t_var, align 128 -// CHECK11-NEXT: store i32 [[TMP11]], i32* getelementptr inbounds ([2 x i32], [2 x i32]* @_ZZ5tmainIiET_vE3vec, i64 0, i64 0), align 128 -// CHECK11-NEXT: [[CALL3:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x %struct.S.0], [2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr, i64 0, i64 0), %struct.S.0* noundef nonnull align 4 dereferenceable(4) @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]]) +// CHECK11-NEXT: [[TMP18:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 128 +// CHECK11-NEXT: [[TMP20:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ5tmainIiET_vE3vec) +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP20]], i64 0, i64 0 +// CHECK11-NEXT: store i32 [[TMP19]], i32* [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[TMP21:%.*]] = call %struct.S.0* @llvm.threadlocal.address.p0s_struct.S.0s(%struct.S.0* @_ZZ5tmainIiET_vE3var) +// CHECK11-NEXT: [[TMP22:%.*]] = call [2 x %struct.S.0]* @llvm.threadlocal.address.p0a2s_struct.S.0s([2 x %struct.S.0]* @_ZZ5tmainIiET_vE5s_arr) +// CHECK11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP22]], i64 0, i64 0 +// CHECK11-NEXT: [[CALL4:%.*]] = call noundef nonnull align 4 dereferenceable(4) %struct.S.0* @_ZN1SIiEaSERKS0_(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYIDX3]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP21]]) // CHECK11-NEXT: ret void // // @@ -1445,17 +1490,19 @@ // CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK11-NEXT: store i32* [[T_VAR]], i32** [[T_VAR_ADDR]], align 8 // CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[T_VAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK11-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @_ZZ5tmainIiET_vE5t_var to i64) -// CHECK11-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZZ5tmainIiET_vE5t_var) +// CHECK11-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 +// CHECK11-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 +// CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] +// CHECK11-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK11: copyin.not.master: -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP3]], i32* @_ZZ5tmainIiET_vE5t_var, align 128 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 128 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[TMP1]], align 128 // CHECK11-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK11: copyin.not.master.end: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]]) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK11-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) // CHECK11-NEXT: ret void // // @@ -1466,8 +1513,9 @@ // CHECK11-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP0:%.*]] = load volatile i32, i32* @g, align 128 -// CHECK11-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* [[TMP0]], align 128 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // @@ -1490,15 +1538,17 @@ // CHECK11-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 // CHECK11-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, i32* @g, align 128 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]] +// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: [[TMP2:%.*]] = load volatile i32, i32* [[TMP1]], align 128 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP2]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[F]], align 4 // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@_ZTW1g -// CHECK11-SAME: () #[[ATTR8:[0-9]+]] comdat { -// CHECK11-NEXT: ret i32* @g +// CHECK11-SAME: () #[[ATTR9:[0-9]+]] comdat { +// CHECK11-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK11-NEXT: ret i32* [[TMP1]] // // // CHECK13-LABEL: define {{[^@]+}}@main @@ -1522,25 +1572,29 @@ // CHECK13-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK13-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK13-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @g to i64) -// CHECK13-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK13-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK13-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 +// CHECK13-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 +// CHECK13-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] +// CHECK13-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK13: copyin.not.master: -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK13-NEXT: store volatile i32 [[TMP3]], i32* @g, align 128 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 128 +// CHECK13-NEXT: store volatile i32 [[TMP5]], i32* [[TMP1]], align 128 // CHECK13-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK13: copyin.not.master.end: -// CHECK13-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK13-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) -// CHECK13-NEXT: store volatile i32 1, i32* @g, align 128 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK13-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) +// CHECK13-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK13-NEXT: store volatile i32 1, i32* [[TMP8]], align 128 // CHECK13-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) // CHECK13-NEXT: ret void // // // CHECK13-LABEL: define {{[^@]+}}@_ZTW1g -// CHECK13-SAME: () #[[ATTR5:[0-9]+]] comdat { -// CHECK13-NEXT: ret i32* @g +// CHECK13-SAME: () #[[ATTR6:[0-9]+]] comdat { +// CHECK13-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK13-NEXT: ret i32* [[TMP1]] // // // CHECK14-LABEL: define {{[^@]+}}@main @@ -1562,7 +1616,8 @@ // CHECK14-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK14-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK14-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @g) +// CHECK14-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) // CHECK14-NEXT: ret void // // @@ -1576,21 +1631,24 @@ // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK14-NEXT: store i32* [[G]], i32** [[G_ADDR]], align 8 // CHECK14-NEXT: [[TMP0:%.*]] = load i32*, i32** [[G_ADDR]], align 8 -// CHECK14-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK14-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @g to i64) -// CHECK14-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK14-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 +// CHECK14-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 +// CHECK14-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] +// CHECK14-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK14: copyin.not.master: -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 128 -// CHECK14-NEXT: store volatile i32 [[TMP3]], i32* @g, align 128 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 128 +// CHECK14-NEXT: store volatile i32 [[TMP5]], i32* [[TMP1]], align 128 // CHECK14-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK14: copyin.not.master.end: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK14-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) -// CHECK14-NEXT: store volatile i32 1, i32* @g, align 128 -// CHECK14-NEXT: [[TMP6:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to %struct.__block_literal_generic*), i32 0, i32 3), align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to void (i8*)* -// CHECK14-NEXT: call void [[TMP7]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to i8*)) +// CHECK14-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK14-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) +// CHECK14-NEXT: [[TMP8:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: store volatile i32 1, i32* [[TMP8]], align 128 +// CHECK14-NEXT: [[TMP9:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// CHECK14-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to void (i8*)* +// CHECK14-NEXT: call void [[TMP10]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global.2 to i8*)) // CHECK14-NEXT: ret void // // @@ -1602,13 +1660,15 @@ // CHECK14-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK14-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* // CHECK14-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 -// CHECK14-NEXT: store volatile i32 2, i32* @g, align 128 +// CHECK14-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: store volatile i32 2, i32* [[TMP0]], align 128 // CHECK14-NEXT: ret void // // // CHECK14-LABEL: define {{[^@]+}}@_ZTW1g -// CHECK14-SAME: () #[[ATTR6:[0-9]+]] comdat { -// CHECK14-NEXT: ret i32* @g +// CHECK14-SAME: () #[[ATTR7:[0-9]+]] comdat { +// CHECK14-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @g) +// CHECK14-NEXT: ret i32* [[TMP1]] // // // CHECK15-LABEL: define {{[^@]+}}@_Z10array_funcv @@ -1630,7 +1690,9 @@ // CHECK15-NEXT: store i8 1, i8* @_ZGVZ10array_funcvE1s, align 1 // CHECK15-NEXT: br label [[INIT_END]] // CHECK15: init.end: -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, [2 x %struct.St]*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* @_ZZ10array_funcvE1a, [2 x %struct.St]* @_ZZ10array_funcvE1s) +// CHECK15-NEXT: [[TMP2:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ10array_funcvE1a) +// CHECK15-NEXT: [[TMP3:%.*]] = call [2 x %struct.St]* @llvm.threadlocal.address.p0a2s_struct.Sts([2 x %struct.St]* @_ZZ10array_funcvE1s) +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, [2 x %struct.St]*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[TMP2]], [2 x %struct.St]* [[TMP3]]) // CHECK15-NEXT: ret void // // @@ -1683,28 +1745,35 @@ // CHECK15-NEXT: store [2 x %struct.St]* [[S]], [2 x %struct.St]** [[S_ADDR]], align 8 // CHECK15-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[A_ADDR]], align 8 // CHECK15-NEXT: [[TMP1:%.*]] = load [2 x %struct.St]*, [2 x %struct.St]** [[S_ADDR]], align 8 -// CHECK15-NEXT: [[TMP2:%.*]] = ptrtoint [2 x i32]* [[TMP0]] to i64 -// CHECK15-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], ptrtoint ([2 x i32]* @_ZZ10array_funcvE1a to i64) -// CHECK15-NEXT: br i1 [[TMP3]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK15-NEXT: [[TMP2:%.*]] = call [2 x i32]* @llvm.threadlocal.address.p0a2i32([2 x i32]* @_ZZ10array_funcvE1a) +// CHECK15-NEXT: [[TMP3:%.*]] = ptrtoint [2 x i32]* [[TMP0]] to i64 +// CHECK15-NEXT: [[TMP4:%.*]] = ptrtoint [2 x i32]* [[TMP2]] to i64 +// CHECK15-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP3]], [[TMP4]] +// CHECK15-NEXT: br i1 [[TMP5]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK15: copyin.not.master: -// CHECK15-NEXT: [[TMP4:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 bitcast ([2 x i32]* @_ZZ10array_funcvE1a to i8*), i8* align 4 [[TMP4]], i64 8, i1 false) -// CHECK15-NEXT: [[TMP5:%.*]] = bitcast [2 x %struct.St]* [[TMP1]] to %struct.St* -// CHECK15-NEXT: br i1 icmp eq (%struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), %struct.St* getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0)), label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK15-NEXT: [[TMP6:%.*]] = bitcast [2 x i32]* [[TMP2]] to i8* +// CHECK15-NEXT: [[TMP7:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK15-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 8, i1 false) +// CHECK15-NEXT: [[TMP8:%.*]] = call [2 x %struct.St]* @llvm.threadlocal.address.p0a2s_struct.Sts([2 x %struct.St]* @_ZZ10array_funcvE1s) +// CHECK15-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.St], [2 x %struct.St]* [[TMP8]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP9:%.*]] = bitcast [2 x %struct.St]* [[TMP1]] to %struct.St* +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr [[STRUCT_ST:%.*]], %struct.St* [[ARRAY_BEGIN]], i64 2 +// CHECK15-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.St* [[ARRAY_BEGIN]], [[TMP10]] +// CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK15: omp.arraycpy.body: -// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.St* [ [[TMP5]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.St* [ getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i32 0, i32 0), [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.St* [ [[TMP9]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK15-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.St* [ [[ARRAY_BEGIN]], [[COPYIN_NOT_MASTER]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK15-NEXT: [[CALL:%.*]] = call noundef nonnull align 4 dereferenceable(8) %struct.St* @_ZN2StaSERKS_(%struct.St* noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.St* noundef nonnull align 4 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]]) -// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST:%.*]], %struct.St* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK15-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_ST]], %struct.St* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK15-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_ST]], %struct.St* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.St* [[OMP_ARRAYCPY_DEST_ELEMENT]], getelementptr inbounds ([2 x %struct.St], [2 x %struct.St]* @_ZZ10array_funcvE1s, i64 1, i64 0) +// CHECK15-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.St* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP10]] // CHECK15-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] // CHECK15: omp.arraycpy.done1: // CHECK15-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK15: copyin.not.master.end: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK15-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 +// CHECK15-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]]) // CHECK15-NEXT: ret void // // @@ -1745,19 +1814,20 @@ // CHECK16-SAME: () #[[ATTR0:[0-9]+]] section ".text.startup" { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[CALL:%.*]] = call noundef i32 @_Z6t_initv() -// CHECK16-NEXT: store i32 [[CALL]], i32* @t, align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @t) +// CHECK16-NEXT: store i32 [[CALL]], i32* [[TMP0]], align 4 // CHECK16-NEXT: ret void // // // CHECK16-LABEL: define {{[^@]+}}@_Z3foov -// CHECK16-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK16-SAME: () #[[ATTR3:[0-9]+]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) // CHECK16-NEXT: ret void // // // CHECK16-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR4:[0-9]+]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1769,7 +1839,7 @@ // // // CHECK16-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T:%.*]]) #[[ATTR3]] { +// CHECK16-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[T:%.*]]) #[[ATTR4]] { // CHECK16-NEXT: entry: // CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -1799,9 +1869,10 @@ // // // CHECK16-LABEL: define {{[^@]+}}@_ZTW1t -// CHECK16-SAME: () #[[ATTR4:[0-9]+]] comdat { +// CHECK16-SAME: () #[[ATTR5:[0-9]+]] comdat { // CHECK16-NEXT: call void @_ZTH1t() -// CHECK16-NEXT: ret i32* @t +// CHECK16-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @t) +// CHECK16-NEXT: ret i32* [[TMP1]] // // // CHECK16-LABEL: define {{[^@]+}}@__tls_init Index: clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -367,15 +367,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -388,32 +389,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -466,14 +468,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -494,9 +496,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/parallel_master_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_codegen.cpp +++ clang/test/OpenMP/parallel_master_codegen.cpp @@ -629,7 +629,8 @@ // CHECK29-LABEL: define {{[^@]+}}@_Z22parallel_master_copyinv // CHECK29-SAME: () #[[ATTR0:[0-9]+]] { // CHECK29-NEXT: entry: -// CHECK29-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* @a) +// CHECK29-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK29-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]]) // CHECK29-NEXT: ret void // // @@ -643,33 +644,37 @@ // CHECK29-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK29-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 // CHECK29-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK29-NEXT: [[TMP1:%.*]] = ptrtoint i32* [[TMP0]] to i64 -// CHECK29-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], ptrtoint (i32* @a to i64) -// CHECK29-NEXT: br i1 [[TMP2]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] +// CHECK29-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK29-NEXT: [[TMP2:%.*]] = ptrtoint i32* [[TMP0]] to i64 +// CHECK29-NEXT: [[TMP3:%.*]] = ptrtoint i32* [[TMP1]] to i64 +// CHECK29-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP2]], [[TMP3]] +// CHECK29-NEXT: br i1 [[TMP4]], label [[COPYIN_NOT_MASTER:%.*]], label [[COPYIN_NOT_MASTER_END:%.*]] // CHECK29: copyin.not.master: -// CHECK29-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK29-NEXT: store i32 [[TMP3]], i32* @a, align 4 +// CHECK29-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK29-NEXT: store i32 [[TMP5]], i32* [[TMP1]], align 4 // CHECK29-NEXT: br label [[COPYIN_NOT_MASTER_END]] // CHECK29: copyin.not.master.end: -// CHECK29-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK29-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK29-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP5]]) // CHECK29-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK29-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK29-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) -// CHECK29-NEXT: [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0 -// CHECK29-NEXT: br i1 [[TMP9]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK29-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]]) +// CHECK29-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK29-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK29-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) +// CHECK29-NEXT: [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0 +// CHECK29-NEXT: br i1 [[TMP11]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] // CHECK29: omp_if.then: -// CHECK29-NEXT: [[TMP10:%.*]] = load i32, i32* @a, align 4 -// CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK29-NEXT: store i32 [[INC]], i32* @a, align 4 -// CHECK29-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) +// CHECK29-NEXT: [[TMP12:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK29-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK29-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK29-NEXT: store i32 [[INC]], i32* [[TMP12]], align 4 +// CHECK29-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB2]], i32 [[TMP9]]) // CHECK29-NEXT: br label [[OMP_IF_END]] // CHECK29: omp_if.end: // CHECK29-NEXT: ret void // // // CHECK29-LABEL: define {{[^@]+}}@_ZTW1a -// CHECK29-SAME: () #[[ATTR4:[0-9]+]] comdat { -// CHECK29-NEXT: ret i32* @a +// CHECK29-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK29-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @a) +// CHECK29-NEXT: ret i32* [[TMP1]] // Index: clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -322,15 +322,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -343,32 +344,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -421,14 +423,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -449,9 +451,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/parallel_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -313,15 +313,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -334,32 +335,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -412,14 +414,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -440,9 +442,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -355,15 +355,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -376,32 +377,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -454,14 +456,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -482,9 +484,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/reduction_implicit_map.cpp =================================================================== --- clang/test/OpenMP/reduction_implicit_map.cpp +++ clang/test/OpenMP/reduction_implicit_map.cpp @@ -693,15 +693,16 @@ // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i8** [[DOTADDR]] to double** // CHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr double, double* [[TMP3]], i64 [[TMP4]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP4:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr double, double* [[TMP3]], i64 [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq double* [[TMP3]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store double 0.000000e+00, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -714,24 +715,25 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[DOTADDR]] to double** -// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[DOTADDR1]] to double** -// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr double, double* [[TMP4]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[TMP4]], [[TMP7]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[DOTADDR]] to double** +// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[DOTADDR1]] to double** +// CHECK1-NEXT: [[TMP7:%.*]] = load double*, double** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr double, double* [[TMP5]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq double* [[TMP5]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE2:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP8:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP8]], [[TMP9]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi double* [ [[TMP7]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi double* [ [[TMP5]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP9:%.*]] = load double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP9]], [[TMP10]] // CHECK1-NEXT: store double [[ADD]], double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 8 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr double, double* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP7]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq double* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP8]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE2]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done2: // CHECK1-NEXT: ret void @@ -787,7 +789,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@main -// CHECK1-SAME: () #[[ATTR10:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR11:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 @@ -798,7 +800,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK1-SAME: () #[[ATTR11:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR12:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: call void @__tgt_register_requires(i64 1) // CHECK1-NEXT: ret void Index: clang/test/OpenMP/sections_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -360,15 +360,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -381,32 +382,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -459,14 +461,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -487,9 +489,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -43,7 +43,7 @@ // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i32* [[ARGC_ADDR]], i8** [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i32* [[ARGC_ADDR]], i8** [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret i32 0 // // @@ -380,15 +380,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -401,32 +402,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -479,14 +481,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -507,9 +509,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -43,7 +43,7 @@ // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i32* [[ARGC_ADDR]], i8** [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i32* [[ARGC_ADDR]], i8** [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret i32 0 // // @@ -326,15 +326,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -347,32 +348,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -425,14 +427,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -453,9 +455,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -43,7 +43,7 @@ // CHECK1-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK1-NEXT: store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i32* [[ARGC_ADDR]], i8** [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i32* [[ARGC_ADDR]], i8** [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret i32 0 // // @@ -353,15 +353,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -374,25 +375,26 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void @@ -726,15 +728,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -747,32 +750,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -825,14 +829,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -853,9 +857,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/taskloop_reduction_codegen.cpp =================================================================== --- clang/test/OpenMP/taskloop_reduction_codegen.cpp +++ clang/test/OpenMP/taskloop_reduction_codegen.cpp @@ -186,12 +186,14 @@ // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( // CHECK: define internal void @[[RED_FINI2]](i8* noundef %0) -// CHECK: load i64, i64* [[RED_SIZE1]] +// CHECK: [[RED_SIZE1_ADDR:%.+]] = call i64* @llvm.threadlocal.address.p0i64(i64* [[RED_SIZE1]] +// CHECK: load i64, i64* [[RED_SIZE1_ADDR]] // CHECK: call void @ // CHECK: ret void // CHECK: define internal void @[[RED_COMB2]](i8* noundef %0, i8* noundef %1) -// CHECK: load i64, i64* [[RED_SIZE1]] +// CHECK: [[RED_SIZE1_ADDR2:%.+]] = call i64* @llvm.threadlocal.address.p0i64(i64* [[RED_SIZE1]] +// CHECK: load i64, i64* [[RED_SIZE1_ADDR2]] // CHECK: call void [[OMP_COMB1]]( // CHECK: ret void @@ -205,12 +207,14 @@ // CHECK: ret void // CHECK: define internal void @[[RED_INIT4]](i8* noalias noundef %{{.+}}, i8* noalias noundef %{{.+}}) -// CHECK: load i64, i64* [[RED_SIZE2]] +// CHECK: [[RED_SIZE2_ADDR:%.+]] = call i64* @llvm.threadlocal.address.p0i64(i64* [[RED_SIZE2]] +// CHECK: load i64, i64* [[RED_SIZE2_ADDR]] // CHECK: store float 0.000000e+00, float* % // CHECK: ret void // CHECK: define internal void @[[RED_COMB4]](i8* noundef %0, i8* noundef %1) -// CHECK: load i64, i64* [[RED_SIZE2]] +// CHECK: [[RED_SIZE2_ADDR2:%.+]] = call i64* @llvm.threadlocal.address.p0i64(i64* [[RED_SIZE2]] +// CHECK: load i64, i64* [[RED_SIZE2_ADDR2]] // CHECK: fadd float % // CHECK: store float %{{.+}}, float* % // CHECK: ret void Index: clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp =================================================================== --- clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp +++ clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp @@ -103,34 +103,35 @@ // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* @x, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store i8* null, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [2 x i32]** -// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [2 x i32]** -// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store i8* null, i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[X_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i64* +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to i64* +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to [2 x i32]** +// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [2 x i32]** +// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2) -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, i32 2, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, i32 2, i8** [[TMP13]], i8** [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) +// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK1-NEXT: br i1 [[TMP16]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP1]], [2 x i32]* [[A]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i64 [[TMP2]], [2 x i32]* [[A]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z5tmainIiET_v() @@ -298,7 +299,7 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v -// CHECK1-SAME: () #[[ATTR3:[0-9]+]] comdat { +// CHECK1-SAME: () #[[ATTR4:[0-9]+]] comdat { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[A:%.*]] = alloca [2 x i32], align 4 // CHECK1-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 @@ -306,34 +307,35 @@ // CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* @x, align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[X_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[X_CASTED]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* -// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK1-NEXT: store i8* null, i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [2 x i32]** -// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [2 x i32]** -// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK1-NEXT: store i8* null, i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[X_CASTED]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i64* +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to i64* +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to [2 x i32]** +// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [2 x i32]** +// CHECK1-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34.region_id, i32 2, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) -// CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK1-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34.region_id, i32 2, i8** [[TMP13]], i8** [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) +// CHECK1-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK1-NEXT: br i1 [[TMP16]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK1: omp_offload.failed: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34(i64 [[TMP1]], [2 x i32]* [[A]]) #[[ATTR2]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34(i64 [[TMP2]], [2 x i32]* [[A]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK1: omp_offload.cont: // CHECK1-NEXT: ret i32 0 @@ -500,12 +502,13 @@ // // // CHECK1-LABEL: define {{[^@]+}}@_ZTW1x -// CHECK1-SAME: () #[[ATTR4:[0-9]+]] comdat { -// CHECK1-NEXT: ret i32* @x +// CHECK1-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK1-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK1-NEXT: ret i32* [[TMP1]] // // // CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK1-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: call void @__tgt_register_requires(i64 1) // CHECK1-NEXT: ret void @@ -522,33 +525,34 @@ // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: store i32 0, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* @x, align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store i8* null, i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [2 x i32]** -// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [2 x i32]** -// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store i8* null, i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to i32* +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to [2 x i32]** +// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [2 x i32]** +// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3:[0-9]+]], i64 -1, i64 2) -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, i32 2, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64.region_id, i32 2, i8** [[TMP13]], i8** [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i32 [[TMP1]], [2 x i32]* [[A]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64(i32 [[TMP2]], [2 x i32]* [[A]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() @@ -710,7 +714,7 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIiET_v -// CHECK3-SAME: () #[[ATTR3:[0-9]+]] comdat { +// CHECK3-SAME: () #[[ATTR4:[0-9]+]] comdat { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[A:%.*]] = alloca [2 x i32], align 4 // CHECK3-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 @@ -718,33 +722,34 @@ // CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* @x, align 4 -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[X_CASTED]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK3-NEXT: store i8* null, i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [2 x i32]** -// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [2 x i32]** -// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK3-NEXT: store i8* null, i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[X_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32* +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8** [[TMP5]] to i32* +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to [2 x i32]** +// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to [2 x i32]** +// CHECK3-NEXT: store [2 x i32]* [[A]], [2 x i32]** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: call void @__kmpc_push_target_tripcount_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i64 2) -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34.region_id, i32 2, i8** [[TMP12]], i8** [[TMP13]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) -// CHECK3-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 -// CHECK3-NEXT: br i1 [[TMP15]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB3]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34.region_id, i32 2, i8** [[TMP13]], i8** [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.4, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.5, i32 0, i32 0), i8** null, i8** null, i32 0, i32 0) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp ne i32 [[TMP15]], 0 +// CHECK3-NEXT: br i1 [[TMP16]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK3: omp_offload.failed: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34(i32 [[TMP1]], [2 x i32]* [[A]]) #[[ATTR2]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l34(i32 [[TMP2]], [2 x i32]* [[A]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK3: omp_offload.cont: // CHECK3-NEXT: ret i32 0 @@ -905,12 +910,13 @@ // // // CHECK3-LABEL: define {{[^@]+}}@_ZTW1x -// CHECK3-SAME: () #[[ATTR4:[0-9]+]] comdat { -// CHECK3-NEXT: ret i32* @x +// CHECK3-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK3-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK3-NEXT: ret i32* [[TMP1]] // // // CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK3-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: () #[[ATTR6:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: call void @__tgt_register_requires(i64 1) // CHECK3-NEXT: ret void @@ -1098,12 +1104,13 @@ // // // CHECK9-LABEL: define {{[^@]+}}@_ZTW1x -// CHECK9-SAME: () #[[ATTR4:[0-9]+]] comdat { -// CHECK9-NEXT: ret i32* @x +// CHECK9-SAME: () #[[ATTR5:[0-9]+]] comdat { +// CHECK9-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @x) +// CHECK9-NEXT: ret i32* [[TMP1]] // // // CHECK9-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK9-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK9-SAME: () #[[ATTR6:[0-9]+]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: call void @__tgt_register_requires(i64 1) // CHECK9-NEXT: ret void Index: clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp =================================================================== --- clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -49,7 +49,7 @@ // CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8 -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i64 [[TMP1]], i8** [[TMP2]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l14(i64 [[TMP1]], i8** [[TMP2]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: ret i32 0 // // @@ -359,15 +359,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -380,25 +381,26 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void @@ -732,15 +734,16 @@ // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP3]] -// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP4]] +// CHECK1-NEXT: [[TMP3:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP2]], i64 [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYINIT_ISEMPTY:%.*]] = icmp eq i8* [[TMP2]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYINIT_ISEMPTY]], label [[OMP_ARRAYINIT_DONE:%.*]], label [[OMP_ARRAYINIT_BODY:%.*]] // CHECK1: omp.arrayinit.body: // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYINIT_BODY]] ] // CHECK1-NEXT: store i8 0, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP4]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYINIT_DONE]], label [[OMP_ARRAYINIT_BODY]] // CHECK1: omp.arrayinit.done: // CHECK1-NEXT: ret void @@ -753,32 +756,33 @@ // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 // CHECK1-NEXT: store i8* [[TMP1]], i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @{{reduction_size[.].+[.]}}, align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr i8, i8* [[TMP3]], i64 [[TMP2]] -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP3]], [[TMP5]] +// CHECK1-NEXT: [[TMP2:%.*]] = call i64* @llvm.threadlocal.address.p0i64(i64* @{{reduction_size[.].+[.]}}) +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr i8, i8* [[TMP4]], i64 [[TMP3]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq i8* [[TMP4]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP3]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] -// CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP6]] to i32 -// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi i8* [ [[TMP5]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi i8* [ [[TMP4]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[TMP7:%.*]] = load i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], align 1 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i8 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], [[CONV2]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i8 // CHECK1-NEXT: store i8 [[CONV3]], i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], align 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr i8, i8* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq i8* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP6]] // CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] // CHECK1: omp.arraycpy.done4: // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_task_privates_map. -// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK1-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], i8*** noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i8***, align 8 @@ -831,14 +835,14 @@ // CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* -// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] +// CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR6]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP24:%.*]] = load i8**, i8*** [[TMP23]], align 8 @@ -859,9 +863,9 @@ // CHECK1-NEXT: [[TMP36:%.*]] = sdiv exact i64 [[TMP35]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP37:%.*]] = add nuw i64 [[TMP36]], 1 // CHECK1-NEXT: [[TMP38:%.*]] = mul nuw i64 [[TMP37]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) -// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8 +// CHECK1-NEXT: store i64 [[TMP37]], i64* @{{reduction_size[.].+[.]}}, align 8, !noalias !12 // CHECK1-NEXT: [[TMP39:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP40:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP39]], i8* [[TMP25]]) #[[ATTR6]] // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 2 // CHECK1-NEXT: [[TMP42:%.*]] = load i8**, i8*** [[TMP41]], align 8 // CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 Index: clang/test/OpenMP/threadprivate_codegen.cpp =================================================================== --- clang/test/OpenMP/threadprivate_codegen.cpp +++ clang/test/OpenMP/threadprivate_codegen.cpp @@ -3662,57 +3662,62 @@ // CHECK-TLS1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0 // CHECK-TLS1-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4 // CHECK-TLS1-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8 -// CHECK-TLS1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]] +// CHECK-TLS1-NEXT: [[TMP6:%.*]] = call %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* @_ZZ4mainE2sm) +// CHECK-TLS1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP6]], i32 0, i32 0 +// CHECK-TLS1-NEXT: [[TMP7:%.*]] = load i32, i32* [[A2]], align 8 +// CHECK-TLS1-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP7]] // CHECK-TLS1-NEXT: store i32 [[ADD]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1() -// CHECK-TLS1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0 -// CHECK-TLS1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK-TLS1-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK-TLS1-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8 -// CHECK-TLS1-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK-TLS1-NEXT: [[TMP9:%.*]] = call %struct.S1* @_ZTWL3gs1() +// CHECK-TLS1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP9]], i32 0, i32 0 +// CHECK-TLS1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK-TLS1-NEXT: [[TMP11:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] // CHECK-TLS1-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3() -// CHECK-TLS1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0 -// CHECK-TLS1-NEXT: [[TMP14:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK-TLS1-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] -// CHECK-TLS1-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x() -// CHECK-TLS1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1 -// CHECK-TLS1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1 -// CHECK-TLS1-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0 -// CHECK-TLS1-NEXT: [[TMP17:%.*]] = load i32, i32* [[A8]], align 4 -// CHECK-TLS1-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK-TLS1-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP19:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4 -// CHECK-TLS1-NEXT: [[TMP20:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP19]] +// CHECK-TLS1-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8 +// CHECK-TLS1-NEXT: [[TMP13:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +// CHECK-TLS1-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[TMP14:%.*]] = call %struct.S5* @_ZTW3gs3() +// CHECK-TLS1-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP14]], i32 0, i32 0 +// CHECK-TLS1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A6]], align 4 +// CHECK-TLS1-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK-TLS1-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[TMP17:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x() +// CHECK-TLS1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP17]], i64 0, i64 1 +// CHECK-TLS1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1 +// CHECK-TLS1-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX8]], i32 0, i32 0 +// CHECK-TLS1-NEXT: [[TMP18:%.*]] = load i32, i32* [[A9]], align 4 +// CHECK-TLS1-NEXT: [[TMP19:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK-TLS1-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP21:%.*]] = load float, float* @_ZN2STIfE2stE, align 4 -// CHECK-TLS1-NEXT: [[CONV:%.*]] = fptosi float [[TMP21]] to i32 +// CHECK-TLS1-NEXT: [[TMP20:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE) +// CHECK-TLS1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 // CHECK-TLS1-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[CONV]] +// CHECK-TLS1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] // CHECK-TLS1-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP23:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE() -// CHECK-TLS1-NEXT: [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP23]], i32 0, i32 0 -// CHECK-TLS1-NEXT: [[TMP24:%.*]] = load i32, i32* [[A12]], align 4 +// CHECK-TLS1-NEXT: [[TMP23:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE) +// CHECK-TLS1-NEXT: [[TMP24:%.*]] = load float, float* [[TMP23]], align 4 +// CHECK-TLS1-NEXT: [[CONV:%.*]] = fptosi float [[TMP24]] to i32 // CHECK-TLS1-NEXT: [[TMP25:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP24]] -// CHECK-TLS1-NEXT: store i32 [[ADD13]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP26:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: ret i32 [[TMP26]] +// CHECK-TLS1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP25]], [[CONV]] +// CHECK-TLS1-NEXT: store i32 [[ADD12]], i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[TMP26:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE() +// CHECK-TLS1-NEXT: [[A13:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP26]], i32 0, i32 0 +// CHECK-TLS1-NEXT: [[TMP27:%.*]] = load i32, i32* [[A13]], align 4 +// CHECK-TLS1-NEXT: [[TMP28:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], [[TMP27]] +// CHECK-TLS1-NEXT: store i32 [[ADD14]], i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[TMP29:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: ret i32 [[TMP29]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTWL3gs1 // CHECK-TLS1-SAME: () #[[ATTR5:[0-9]+]] { // CHECK-TLS1-NEXT: call void @_ZTHL3gs1() -// CHECK-TLS1-NEXT: ret %struct.S1* @_ZL3gs1 +// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* @_ZL3gs1) +// CHECK-TLS1-NEXT: ret %struct.S1* [[TMP1]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei @@ -3745,7 +3750,8 @@ // CHECK-TLS1-NEXT: call void @_ZTHN6Static1sE() // CHECK-TLS1-NEXT: br label [[TMP2]] // CHECK-TLS1: 2: -// CHECK-TLS1-NEXT: ret %struct.S3* @_ZN6Static1sE +// CHECK-TLS1-NEXT: [[TMP3:%.*]] = call %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* @_ZN6Static1sE) +// CHECK-TLS1-NEXT: ret %struct.S3* [[TMP3]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTW3gs3 @@ -3755,19 +3761,22 @@ // CHECK-TLS1-NEXT: call void @_ZTH3gs3() // CHECK-TLS1-NEXT: br label [[TMP2]] // CHECK-TLS1: 2: -// CHECK-TLS1-NEXT: ret %struct.S5* @gs3 +// CHECK-TLS1-NEXT: [[TMP3:%.*]] = call %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* @gs3) +// CHECK-TLS1-NEXT: ret %struct.S5* [[TMP3]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTW5arr_x // CHECK-TLS1-SAME: () #[[ATTR5]] comdat { // CHECK-TLS1-NEXT: call void @_ZTH5arr_x() -// CHECK-TLS1-NEXT: ret [2 x [3 x %struct.S1]]* @arr_x +// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* @arr_x) +// CHECK-TLS1-NEXT: ret [2 x [3 x %struct.S1]]* [[TMP1]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE // CHECK-TLS1-SAME: () #[[ATTR5]] comdat { // CHECK-TLS1-NEXT: call void @_ZTHN2STI2S4E2stE() -// CHECK-TLS1-NEXT: ret %struct.S4* @_ZN2STI2S4E2stE +// CHECK-TLS1-NEXT: [[TMP1:%.*]] = call %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* @_ZN2STI2S4E2stE) +// CHECK-TLS1-NEXT: ret %struct.S4* [[TMP1]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei @@ -3796,7 +3805,7 @@ // // // CHECK-TLS1-LABEL: define {{[^@]+}}@_Z6foobarv -// CHECK-TLS1-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK-TLS1-SAME: () #[[ATTR7:[0-9]+]] { // CHECK-TLS1-NEXT: entry: // CHECK-TLS1-NEXT: [[RES:%.*]] = alloca i32, align 4 // CHECK-TLS1-NEXT: [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE() @@ -3827,23 +3836,25 @@ // CHECK-TLS1-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4 // CHECK-TLS1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] // CHECK-TLS1-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP13:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4 -// CHECK-TLS1-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[TMP13]] +// CHECK-TLS1-NEXT: [[TMP13:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE) +// CHECK-TLS1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK-TLS1-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] // CHECK-TLS1-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP15:%.*]] = load float, float* @_ZN2STIfE2stE, align 4 -// CHECK-TLS1-NEXT: [[CONV:%.*]] = fptosi float [[TMP15]] to i32 -// CHECK-TLS1-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[CONV]] +// CHECK-TLS1-NEXT: [[TMP16:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE) +// CHECK-TLS1-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4 +// CHECK-TLS1-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32 +// CHECK-TLS1-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[CONV]] // CHECK-TLS1-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP17:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE() -// CHECK-TLS1-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP17]], i32 0, i32 0 -// CHECK-TLS1-NEXT: [[TMP18:%.*]] = load i32, i32* [[A10]], align 4 -// CHECK-TLS1-NEXT: [[TMP19:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] +// CHECK-TLS1-NEXT: [[TMP19:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE() +// CHECK-TLS1-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP19]], i32 0, i32 0 +// CHECK-TLS1-NEXT: [[TMP20:%.*]] = load i32, i32* [[A10]], align 4 +// CHECK-TLS1-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] // CHECK-TLS1-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: [[TMP20:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS1-NEXT: ret i32 [[TMP20]] +// CHECK-TLS1-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS1-NEXT: ret i32 [[TMP22]] // // // CHECK-TLS1-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 @@ -3953,59 +3964,62 @@ // CHECK-TLS2-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0 // CHECK-TLS2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4 // CHECK-TLS2-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8 -// CHECK-TLS2-NEXT: [[TMP7:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]] +// CHECK-TLS2-NEXT: [[TMP6:%.*]] = call %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* @_ZZ4mainE2sm) +// CHECK-TLS2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP6]], i32 0, i32 0 +// CHECK-TLS2-NEXT: [[TMP7:%.*]] = load i32, i32* [[A2]], align 8 +// CHECK-TLS2-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP7]] // CHECK-TLS2-NEXT: store i32 [[ADD]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1() -// CHECK-TLS2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0 -// CHECK-TLS2-NEXT: [[TMP9:%.*]] = load i32, i32* [[A2]], align 4 -// CHECK-TLS2-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]] -// CHECK-TLS2-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8 -// CHECK-TLS2-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] +// CHECK-TLS2-NEXT: [[TMP9:%.*]] = call %struct.S1* @_ZTWL3gs1() +// CHECK-TLS2-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP9]], i32 0, i32 0 +// CHECK-TLS2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A3]], align 4 +// CHECK-TLS2-NEXT: [[TMP11:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] // CHECK-TLS2-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3() -// CHECK-TLS2-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0 -// CHECK-TLS2-NEXT: [[TMP14:%.*]] = load i32, i32* [[A5]], align 4 -// CHECK-TLS2-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]] -// CHECK-TLS2-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x() -// CHECK-TLS2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1 -// CHECK-TLS2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1 -// CHECK-TLS2-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0 -// CHECK-TLS2-NEXT: [[TMP17:%.*]] = load i32, i32* [[A8]], align 4 -// CHECK-TLS2-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]] -// CHECK-TLS2-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP19:%.*]] = call i32* @_ZTWN2STIiE2stE() -// CHECK-TLS2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK-TLS2-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]] +// CHECK-TLS2-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8 +// CHECK-TLS2-NEXT: [[TMP13:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +// CHECK-TLS2-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[TMP14:%.*]] = call %struct.S5* @_ZTW3gs3() +// CHECK-TLS2-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP14]], i32 0, i32 0 +// CHECK-TLS2-NEXT: [[TMP15:%.*]] = load i32, i32* [[A6]], align 4 +// CHECK-TLS2-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +// CHECK-TLS2-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[TMP17:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x() +// CHECK-TLS2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP17]], i64 0, i64 1 +// CHECK-TLS2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1 +// CHECK-TLS2-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX8]], i32 0, i32 0 +// CHECK-TLS2-NEXT: [[TMP18:%.*]] = load i32, i32* [[A9]], align 4 +// CHECK-TLS2-NEXT: [[TMP19:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], [[TMP18]] // CHECK-TLS2-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP22:%.*]] = call float* @_ZTWN2STIfE2stE() -// CHECK-TLS2-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 -// CHECK-TLS2-NEXT: [[CONV:%.*]] = fptosi float [[TMP23]] to i32 -// CHECK-TLS2-NEXT: [[TMP24:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[CONV]] +// CHECK-TLS2-NEXT: [[TMP20:%.*]] = call i32* @_ZTWN2STIiE2stE() +// CHECK-TLS2-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 +// CHECK-TLS2-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] // CHECK-TLS2-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[TMP25:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE() -// CHECK-TLS2-NEXT: [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP25]], i32 0, i32 0 -// CHECK-TLS2-NEXT: [[TMP26:%.*]] = load i32, i32* [[A12]], align 4 -// CHECK-TLS2-NEXT: [[TMP27:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP27]], [[TMP26]] -// CHECK-TLS2-NEXT: store i32 [[ADD13]], i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[TMP23:%.*]] = call float* @_ZTWN2STIfE2stE() +// CHECK-TLS2-NEXT: [[TMP24:%.*]] = load float, float* [[TMP23]], align 4 +// CHECK-TLS2-NEXT: [[CONV:%.*]] = fptosi float [[TMP24]] to i32 +// CHECK-TLS2-NEXT: [[TMP25:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP25]], [[CONV]] +// CHECK-TLS2-NEXT: store i32 [[ADD12]], i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[TMP26:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE() +// CHECK-TLS2-NEXT: [[A13:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP26]], i32 0, i32 0 +// CHECK-TLS2-NEXT: [[TMP27:%.*]] = load i32, i32* [[A13]], align 4 // CHECK-TLS2-NEXT: [[TMP28:%.*]] = load i32, i32* [[RES]], align 4 -// CHECK-TLS2-NEXT: ret i32 [[TMP28]] +// CHECK-TLS2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], [[TMP27]] +// CHECK-TLS2-NEXT: store i32 [[ADD14]], i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: [[TMP29:%.*]] = load i32, i32* [[RES]], align 4 +// CHECK-TLS2-NEXT: ret i32 [[TMP29]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWL3gs1 // CHECK-TLS2-SAME: () #[[ATTR1:[0-9]+]] { // CHECK-TLS2-NEXT: call void @_ZTHL3gs1() -// CHECK-TLS2-NEXT: ret %struct.S1* @_ZL3gs1 +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* @_ZL3gs1) +// CHECK-TLS2-NEXT: ret %struct.S1* [[TMP1]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei @@ -4038,7 +4052,8 @@ // CHECK-TLS2-NEXT: call void @_ZTHN6Static1sE() // CHECK-TLS2-NEXT: br label [[TMP2]] // CHECK-TLS2: 2: -// CHECK-TLS2-NEXT: ret %struct.S3* @_ZN6Static1sE +// CHECK-TLS2-NEXT: [[TMP3:%.*]] = call %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* @_ZN6Static1sE) +// CHECK-TLS2-NEXT: ret %struct.S3* [[TMP3]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTW3gs3 @@ -4048,33 +4063,38 @@ // CHECK-TLS2-NEXT: call void @_ZTH3gs3() // CHECK-TLS2-NEXT: br label [[TMP2]] // CHECK-TLS2: 2: -// CHECK-TLS2-NEXT: ret %struct.S5* @gs3 +// CHECK-TLS2-NEXT: [[TMP3:%.*]] = call %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* @gs3) +// CHECK-TLS2-NEXT: ret %struct.S5* [[TMP3]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTW5arr_x // CHECK-TLS2-SAME: () #[[ATTR1]] comdat { // CHECK-TLS2-NEXT: call void @_ZTH5arr_x() -// CHECK-TLS2-NEXT: ret [2 x [3 x %struct.S1]]* @arr_x +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* @arr_x) +// CHECK-TLS2-NEXT: ret [2 x [3 x %struct.S1]]* [[TMP1]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STIiE2stE // CHECK-TLS2-SAME: () #[[ATTR1]] comdat { -// CHECK-TLS2-NEXT: ret i32* @_ZN2STIiE2stE +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE) +// CHECK-TLS2-NEXT: ret i32* [[TMP1]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STIfE2stE // CHECK-TLS2-SAME: () #[[ATTR1]] comdat { -// CHECK-TLS2-NEXT: ret float* @_ZN2STIfE2stE +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE) +// CHECK-TLS2-NEXT: ret float* [[TMP1]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE // CHECK-TLS2-SAME: () #[[ATTR1]] comdat { // CHECK-TLS2-NEXT: call void @_ZTHN2STI2S4E2stE() -// CHECK-TLS2-NEXT: ret %struct.S4* @_ZN2STI2S4E2stE +// CHECK-TLS2-NEXT: [[TMP1:%.*]] = call %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* @_ZN2STI2S4E2stE) +// CHECK-TLS2-NEXT: ret %struct.S4* [[TMP1]] // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_Z6foobarv -// CHECK-TLS2-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-TLS2-SAME: () #[[ATTR6:[0-9]+]] { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: [[RES:%.*]] = alloca i32, align 4 // CHECK-TLS2-NEXT: [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE() @@ -4127,7 +4147,7 @@ // // // CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_var_init -// CHECK-TLS2-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK-TLS2-SAME: () #[[ATTR7:[0-9]+]] { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: call void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 noundef 5) // CHECK-TLS2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR4]] @@ -4183,7 +4203,7 @@ // // // CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 -// CHECK-TLS2-SAME: () #[[ATTR6]] { +// CHECK-TLS2-SAME: () #[[ATTR7]] { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: call void @_ZN2S2C1Ei(%struct.S2* noundef nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 noundef 27) // CHECK-TLS2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR4]] @@ -4239,7 +4259,7 @@ // // // CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 -// CHECK-TLS2-SAME: () #[[ATTR6]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +// CHECK-TLS2-SAME: () #[[ATTR7]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8 // CHECK-TLS2-NEXT: [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8 @@ -4332,7 +4352,7 @@ // // // CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_array_dtor -// CHECK-TLS2-SAME: (i8* noundef [[TMP0:%.*]]) #[[ATTR6]] { +// CHECK-TLS2-SAME: (i8* noundef [[TMP0:%.*]]) #[[ATTR7]] { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK-TLS2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 @@ -4373,7 +4393,7 @@ // // // CHECK-TLS2-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 -// CHECK-TLS2-SAME: () #[[ATTR6]] { +// CHECK-TLS2-SAME: () #[[ATTR7]] { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8 // CHECK-TLS2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 @@ -4436,14 +4456,14 @@ // // // CHECK-TLS2-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp -// CHECK-TLS2-SAME: () #[[ATTR6]] { +// CHECK-TLS2-SAME: () #[[ATTR7]] { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: call void @__cxx_global_var_init.1() // CHECK-TLS2-NEXT: ret void // // // CHECK-TLS2-LABEL: define {{[^@]+}}@__tls_init -// CHECK-TLS2-SAME: () #[[ATTR6]] { +// CHECK-TLS2-SAME: () #[[ATTR7]] { // CHECK-TLS2-NEXT: entry: // CHECK-TLS2-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1 // CHECK-TLS2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0 @@ -4714,83 +4734,88 @@ // CHECK-TLS3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0, !dbg [[DBG212:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG212]] // CHECK-TLS3-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4, !dbg [[DBG213:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG214:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP7:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG215:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]], !dbg [[DBG215]] -// CHECK-TLS3-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG215]] -// CHECK-TLS3-NEXT: [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG216:![0-9]+]] -// CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A2]], align 4, !dbg [[DBG217]] -// CHECK-TLS3-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG218:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG218]] -// CHECK-TLS3-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG218]] -// CHECK-TLS3-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG219:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG220:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG220]] -// CHECK-TLS3-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG220]] -// CHECK-TLS3-NEXT: [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG221:![0-9]+]] -// CHECK-TLS3-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0, !dbg [[DBG222:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP14:%.*]] = load i32, i32* [[A5]], align 4, !dbg [[DBG222]] -// CHECK-TLS3-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG223:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG223]] -// CHECK-TLS3-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG223]] -// CHECK-TLS3-NEXT: [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG224:![0-9]+]] -// CHECK-TLS3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1, !dbg [[DBG224]] -// CHECK-TLS3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG224]] -// CHECK-TLS3-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0, !dbg [[DBG225:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP17:%.*]] = load i32, i32* [[A8]], align 4, !dbg [[DBG225]] -// CHECK-TLS3-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG226:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]], !dbg [[DBG226]] -// CHECK-TLS3-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG226]] -// CHECK-TLS3-NEXT: [[TMP19:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG227:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG228:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], [[TMP19]], !dbg [[DBG228]] -// CHECK-TLS3-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG228]] -// CHECK-TLS3-NEXT: [[TMP21:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG229:![0-9]+]] -// CHECK-TLS3-NEXT: [[CONV:%.*]] = fptosi float [[TMP21]] to i32, !dbg [[DBG229]] -// CHECK-TLS3-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG230:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[CONV]], !dbg [[DBG230]] -// CHECK-TLS3-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG230]] -// CHECK-TLS3-NEXT: [[TMP23:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG231:![0-9]+]] -// CHECK-TLS3-NEXT: [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP23]], i32 0, i32 0, !dbg [[DBG232:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP24:%.*]] = load i32, i32* [[A12]], align 4, !dbg [[DBG232]] -// CHECK-TLS3-NEXT: [[TMP25:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG233:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP25]], [[TMP24]], !dbg [[DBG233]] -// CHECK-TLS3-NEXT: store i32 [[ADD13]], i32* [[RES]], align 4, !dbg [[DBG233]] -// CHECK-TLS3-NEXT: [[TMP26:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG234:![0-9]+]] -// CHECK-TLS3-NEXT: ret i32 [[TMP26]], !dbg [[DBG235:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP6:%.*]] = call %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* @_ZZ4mainE2sm), !dbg [[DBG214:![0-9]+]] +// CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP6]], i32 0, i32 0, !dbg [[DBG215:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP7:%.*]] = load i32, i32* [[A2]], align 8, !dbg [[DBG215]] +// CHECK-TLS3-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG216:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG216]] +// CHECK-TLS3-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG216]] +// CHECK-TLS3-NEXT: [[TMP9:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG217:![0-9]+]] +// CHECK-TLS3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP9]], i32 0, i32 0, !dbg [[DBG218:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP10:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG218]] +// CHECK-TLS3-NEXT: [[TMP11:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG219:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP10]], !dbg [[DBG219]] +// CHECK-TLS3-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG219]] +// CHECK-TLS3-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG220:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP13:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG221:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP12]], !dbg [[DBG221]] +// CHECK-TLS3-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG221]] +// CHECK-TLS3-NEXT: [[TMP14:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG222:![0-9]+]] +// CHECK-TLS3-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP14]], i32 0, i32 0, !dbg [[DBG223:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG223]] +// CHECK-TLS3-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG224:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], [[TMP15]], !dbg [[DBG224]] +// CHECK-TLS3-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG224]] +// CHECK-TLS3-NEXT: [[TMP17:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG225:![0-9]+]] +// CHECK-TLS3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP17]], i64 0, i64 1, !dbg [[DBG225]] +// CHECK-TLS3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG225]] +// CHECK-TLS3-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX8]], i32 0, i32 0, !dbg [[DBG226:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP18:%.*]] = load i32, i32* [[A9]], align 4, !dbg [[DBG226]] +// CHECK-TLS3-NEXT: [[TMP19:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG227:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], [[TMP18]], !dbg [[DBG227]] +// CHECK-TLS3-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG227]] +// CHECK-TLS3-NEXT: [[TMP20:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE), !dbg [[DBG228:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4, !dbg [[DBG228]] +// CHECK-TLS3-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG229:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP21]], !dbg [[DBG229]] +// CHECK-TLS3-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG229]] +// CHECK-TLS3-NEXT: [[TMP23:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE), !dbg [[DBG230:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP24:%.*]] = load float, float* [[TMP23]], align 4, !dbg [[DBG230]] +// CHECK-TLS3-NEXT: [[CONV:%.*]] = fptosi float [[TMP24]] to i32, !dbg [[DBG230]] +// CHECK-TLS3-NEXT: [[TMP25:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG231:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP25]], [[CONV]], !dbg [[DBG231]] +// CHECK-TLS3-NEXT: store i32 [[ADD12]], i32* [[RES]], align 4, !dbg [[DBG231]] +// CHECK-TLS3-NEXT: [[TMP26:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG232:![0-9]+]] +// CHECK-TLS3-NEXT: [[A13:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP26]], i32 0, i32 0, !dbg [[DBG233:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP27:%.*]] = load i32, i32* [[A13]], align 4, !dbg [[DBG233]] +// CHECK-TLS3-NEXT: [[TMP28:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG234:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], [[TMP27]], !dbg [[DBG234]] +// CHECK-TLS3-NEXT: store i32 [[ADD14]], i32* [[RES]], align 4, !dbg [[DBG234]] +// CHECK-TLS3-NEXT: [[TMP29:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG235:![0-9]+]] +// CHECK-TLS3-NEXT: ret i32 [[TMP29]], !dbg [[DBG236:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWL3gs1 // CHECK-TLS3-SAME: () #[[ATTR6:[0-9]+]] { // CHECK-TLS3-NEXT: call void @_ZTHL3gs1() -// CHECK-TLS3-NEXT: ret %struct.S1* @_ZL3gs1 +// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* @_ZL3gs1) +// CHECK-TLS3-NEXT: ret %struct.S1* [[TMP1]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei -// CHECK-TLS3-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG236:![0-9]+]] { +// CHECK-TLS3-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG237:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG240:![0-9]+]] // CHECK-TLS3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META240:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META241:![0-9]+]], metadata !DIExpression()), !dbg [[DBG242:![0-9]+]] // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG242:![0-9]+]] -// CHECK-TLS3-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG242]] -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG243:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG243:![0-9]+]] +// CHECK-TLS3-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG243]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG244:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev -// CHECK-TLS3-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG244:![0-9]+]] { +// CHECK-TLS3-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG245:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS3-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META245:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG247:![0-9]+]] // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG247:![0-9]+]] -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG248:![0-9]+]] +// CHECK-TLS3-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG248:![0-9]+]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG249:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWN6Static1sE @@ -4800,7 +4825,8 @@ // CHECK-TLS3-NEXT: call void @_ZTHN6Static1sE() // CHECK-TLS3-NEXT: br label [[TMP2]] // CHECK-TLS3: 2: -// CHECK-TLS3-NEXT: ret %struct.S3* @_ZN6Static1sE +// CHECK-TLS3-NEXT: [[TMP3:%.*]] = call %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* @_ZN6Static1sE) +// CHECK-TLS3-NEXT: ret %struct.S3* [[TMP3]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTW3gs3 @@ -4810,188 +4836,193 @@ // CHECK-TLS3-NEXT: call void @_ZTH3gs3() // CHECK-TLS3-NEXT: br label [[TMP2]] // CHECK-TLS3: 2: -// CHECK-TLS3-NEXT: ret %struct.S5* @gs3 +// CHECK-TLS3-NEXT: [[TMP3:%.*]] = call %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* @gs3) +// CHECK-TLS3-NEXT: ret %struct.S5* [[TMP3]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTW5arr_x // CHECK-TLS3-SAME: () #[[ATTR6]] comdat { // CHECK-TLS3-NEXT: call void @_ZTH5arr_x() -// CHECK-TLS3-NEXT: ret [2 x [3 x %struct.S1]]* @arr_x +// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* @arr_x) +// CHECK-TLS3-NEXT: ret [2 x [3 x %struct.S1]]* [[TMP1]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE // CHECK-TLS3-SAME: () #[[ATTR6]] comdat { // CHECK-TLS3-NEXT: call void @_ZTHN2STI2S4E2stE() -// CHECK-TLS3-NEXT: ret %struct.S4* @_ZN2STI2S4E2stE +// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* @_ZN2STI2S4E2stE) +// CHECK-TLS3-NEXT: ret %struct.S4* [[TMP1]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei -// CHECK-TLS3-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG249:![0-9]+]] { +// CHECK-TLS3-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG250:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG251:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] // CHECK-TLS3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]] // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG254:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG255:![0-9]+]] -// CHECK-TLS3-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG254]] -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG256:![0-9]+]] +// CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG255:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG256:![0-9]+]] +// CHECK-TLS3-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG255]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG257:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev -// CHECK-TLS3-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG257:![0-9]+]] { +// CHECK-TLS3-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG258:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS3-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG259:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META259:![0-9]+]], metadata !DIExpression()), !dbg [[DBG260:![0-9]+]] // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG260:![0-9]+]] -// CHECK-TLS3-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG262:![0-9]+]] -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG263:![0-9]+]] +// CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG261:![0-9]+]] +// CHECK-TLS3-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG263:![0-9]+]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG264:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_Z6foobarv -// CHECK-TLS3-SAME: () #[[ATTR7:[0-9]+]] !dbg [[DBG264:![0-9]+]] { +// CHECK-TLS3-SAME: () #[[ATTR7:[0-9]+]] !dbg [[DBG265:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG266:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG267:![0-9]+]] -// CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0, !dbg [[DBG268:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG268]] -// CHECK-TLS3-NEXT: store i32 [[TMP1]], i32* [[RES]], align 4, !dbg [[DBG269:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG270:![0-9]+]] -// CHECK-TLS3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0, !dbg [[DBG271:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG271]] -// CHECK-TLS3-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG272:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG272]] -// CHECK-TLS3-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG272]] -// CHECK-TLS3-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG273:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG274:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG274]] -// CHECK-TLS3-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG274]] -// CHECK-TLS3-NEXT: [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG275:![0-9]+]] -// CHECK-TLS3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0, !dbg [[DBG276:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG276]] -// CHECK-TLS3-NEXT: [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG277:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG277]] -// CHECK-TLS3-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG277]] -// CHECK-TLS3-NEXT: [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG278:![0-9]+]] -// CHECK-TLS3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1, !dbg [[DBG278]] -// CHECK-TLS3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG278]] -// CHECK-TLS3-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG279:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG279]] -// CHECK-TLS3-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG280:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG280]] -// CHECK-TLS3-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG280]] -// CHECK-TLS3-NEXT: [[TMP13:%.*]] = load i32, i32* @_ZN2STIiE2stE, align 4, !dbg [[DBG281:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP14:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG282:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP14]], [[TMP13]], !dbg [[DBG282]] -// CHECK-TLS3-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG282]] -// CHECK-TLS3-NEXT: [[TMP15:%.*]] = load float, float* @_ZN2STIfE2stE, align 4, !dbg [[DBG283:![0-9]+]] -// CHECK-TLS3-NEXT: [[CONV:%.*]] = fptosi float [[TMP15]] to i32, !dbg [[DBG283]] -// CHECK-TLS3-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG284:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], [[CONV]], !dbg [[DBG284]] -// CHECK-TLS3-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG284]] -// CHECK-TLS3-NEXT: [[TMP17:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG285:![0-9]+]] -// CHECK-TLS3-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP17]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP18:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG286]] -// CHECK-TLS3-NEXT: [[TMP19:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG287:![0-9]+]] -// CHECK-TLS3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP19]], [[TMP18]], !dbg [[DBG287]] -// CHECK-TLS3-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG287]] -// CHECK-TLS3-NEXT: [[TMP20:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG288:![0-9]+]] -// CHECK-TLS3-NEXT: ret i32 [[TMP20]], !dbg [[DBG289:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG268:![0-9]+]] +// CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0, !dbg [[DBG269:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG269]] +// CHECK-TLS3-NEXT: store i32 [[TMP1]], i32* [[RES]], align 4, !dbg [[DBG270:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG271:![0-9]+]] +// CHECK-TLS3-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG272]] +// CHECK-TLS3-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG273:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG273]] +// CHECK-TLS3-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG273]] +// CHECK-TLS3-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG274:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG275:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG275]] +// CHECK-TLS3-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG275]] +// CHECK-TLS3-NEXT: [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG276:![0-9]+]] +// CHECK-TLS3-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0, !dbg [[DBG277:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG277]] +// CHECK-TLS3-NEXT: [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG278:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG278]] +// CHECK-TLS3-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG278]] +// CHECK-TLS3-NEXT: [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG279:![0-9]+]] +// CHECK-TLS3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1, !dbg [[DBG279]] +// CHECK-TLS3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG279]] +// CHECK-TLS3-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG280:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG280]] +// CHECK-TLS3-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG281:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG281]] +// CHECK-TLS3-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG281]] +// CHECK-TLS3-NEXT: [[TMP13:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE), !dbg [[DBG282:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG282]] +// CHECK-TLS3-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG283:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG283]] +// CHECK-TLS3-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG283]] +// CHECK-TLS3-NEXT: [[TMP16:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE), !dbg [[DBG284:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4, !dbg [[DBG284]] +// CHECK-TLS3-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG284]] +// CHECK-TLS3-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG285:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG285]] +// CHECK-TLS3-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG285]] +// CHECK-TLS3-NEXT: [[TMP19:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG286:![0-9]+]] +// CHECK-TLS3-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP19]], i32 0, i32 0, !dbg [[DBG287:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG287]] +// CHECK-TLS3-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG288:![0-9]+]] +// CHECK-TLS3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG288]] +// CHECK-TLS3-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG288]] +// CHECK-TLS3-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG289:![0-9]+]] +// CHECK-TLS3-NEXT: ret i32 [[TMP22]], !dbg [[DBG290:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 -// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG290:![0-9]+]] { +// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG291:![0-9]+]] { // CHECK-TLS3-NEXT: entry: -// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG291:![0-9]+]] -// CHECK-TLS3-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG291]] -// CHECK-TLS3-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG291]] +// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG292:![0-9]+]] +// CHECK-TLS3-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG292]] +// CHECK-TLS3-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG292]] // CHECK-TLS3: init.check: -// CHECK-TLS3-NEXT: call void @_ZN2S4C1Ei(%struct.S4* noundef nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 noundef 23), !dbg [[DBG292:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG291]] -// CHECK-TLS3-NEXT: store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG291]] -// CHECK-TLS3-NEXT: br label [[INIT_END]], !dbg [[DBG291]] +// CHECK-TLS3-NEXT: call void @_ZN2S4C1Ei(%struct.S4* noundef nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 noundef 23), !dbg [[DBG293:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR3]], !dbg [[DBG292]] +// CHECK-TLS3-NEXT: store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG292]] +// CHECK-TLS3-NEXT: br label [[INIT_END]], !dbg [[DBG292]] // CHECK-TLS3: init.end: -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG294:![0-9]+]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG295:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S4C1Ei -// CHECK-TLS3-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG295:![0-9]+]] { +// CHECK-TLS3-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG296:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG299:![0-9]+]] // CHECK-TLS3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG301:![0-9]+]] // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG301:![0-9]+]] -// CHECK-TLS3-NEXT: call void @_ZN2S4C2Ei(%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG301]] -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG302:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG302:![0-9]+]] +// CHECK-TLS3-NEXT: call void @_ZN2S4C2Ei(%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG302]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG303:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S4D1Ev -// CHECK-TLS3-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG303:![0-9]+]] { +// CHECK-TLS3-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG304:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // CHECK-TLS3-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG305:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306:![0-9]+]] // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @_ZN2S4D2Ev(%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG306:![0-9]+]] -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG307:![0-9]+]] +// CHECK-TLS3-NEXT: call void @_ZN2S4D2Ev(%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG307:![0-9]+]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG308:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S4C2Ei -// CHECK-TLS3-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG308:![0-9]+]] { +// CHECK-TLS3-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG309:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG310:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]] // CHECK-TLS3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG313:![0-9]+]] // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG313:![0-9]+]] -// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG314:![0-9]+]] -// CHECK-TLS3-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG313]] -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG315:![0-9]+]] +// CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG314:![0-9]+]] +// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG315:![0-9]+]] +// CHECK-TLS3-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG314]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG316:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZN2S4D2Ev -// CHECK-TLS3-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG316:![0-9]+]] { +// CHECK-TLS3-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG317:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // CHECK-TLS3-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META317:![0-9]+]], metadata !DIExpression()), !dbg [[DBG318:![0-9]+]] +// CHECK-TLS3-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META318:![0-9]+]], metadata !DIExpression()), !dbg [[DBG319:![0-9]+]] // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG319:![0-9]+]] -// CHECK-TLS3-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG321:![0-9]+]] -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG322:![0-9]+]] +// CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG320:![0-9]+]] +// CHECK-TLS3-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG322:![0-9]+]] +// CHECK-TLS3-NEXT: ret void, !dbg [[DBG323:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp -// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG323:![0-9]+]] { +// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG324:![0-9]+]] { // CHECK-TLS3-NEXT: entry: -// CHECK-TLS3-NEXT: call void @__cxx_global_var_init.1(), !dbg [[DBG325:![0-9]+]] +// CHECK-TLS3-NEXT: call void @__cxx_global_var_init.1(), !dbg [[DBG326:![0-9]+]] // CHECK-TLS3-NEXT: ret void // // // CHECK-TLS3-LABEL: define {{[^@]+}}@__tls_init -// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG326:![0-9]+]] { +// CHECK-TLS3-SAME: () #[[ATTR0]] !dbg [[DBG327:![0-9]+]] { // CHECK-TLS3-NEXT: entry: -// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1, !dbg [[DBG327:![0-9]+]] -// CHECK-TLS3-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG327]] -// CHECK-TLS3-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG327]], !prof [[PROF207]] +// CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1, !dbg [[DBG328:![0-9]+]] +// CHECK-TLS3-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG328]] +// CHECK-TLS3-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG328]], !prof [[PROF207]] // CHECK-TLS3: init: -// CHECK-TLS3-NEXT: store i8 1, i8* @__tls_guard, align 1, !dbg [[DBG327]] -// CHECK-TLS3-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG327]] -// CHECK-TLS3-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG327]] -// CHECK-TLS3-NEXT: br label [[EXIT]], !dbg [[DBG327]] +// CHECK-TLS3-NEXT: store i8 1, i8* @__tls_guard, align 1, !dbg [[DBG328]] +// CHECK-TLS3-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG328]] +// CHECK-TLS3-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG328]] +// CHECK-TLS3-NEXT: br label [[EXIT]], !dbg [[DBG328]] // CHECK-TLS3: exit: // CHECK-TLS3-NEXT: ret void // @@ -5019,85 +5050,88 @@ // CHECK-TLS4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP4]], i32 0, i32 0, !dbg [[DBG124:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP5:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG124]] // CHECK-TLS4-NEXT: store i32 [[TMP5]], i32* [[RES]], align 4, !dbg [[DBG125:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_SMAIN:%.*]], %struct.Smain* @_ZZ4mainE2sm, i32 0, i32 0), align 8, !dbg [[DBG126:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP7:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG127:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], [[TMP6]], !dbg [[DBG127]] -// CHECK-TLS4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG127]] -// CHECK-TLS4-NEXT: [[TMP8:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG128:![0-9]+]] -// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP8]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A2]], align 4, !dbg [[DBG129]] -// CHECK-TLS4-NEXT: [[TMP10:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG130:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], [[TMP9]], !dbg [[DBG130]] -// CHECK-TLS4-NEXT: store i32 [[ADD3]], i32* [[RES]], align 4, !dbg [[DBG130]] -// CHECK-TLS4-NEXT: [[TMP11:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG131:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG132:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG132]] -// CHECK-TLS4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG132]] -// CHECK-TLS4-NEXT: [[TMP13:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG133:![0-9]+]] -// CHECK-TLS4-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP13]], i32 0, i32 0, !dbg [[DBG134:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP14:%.*]] = load i32, i32* [[A5]], align 4, !dbg [[DBG134]] -// CHECK-TLS4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG135:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG135]] -// CHECK-TLS4-NEXT: store i32 [[ADD6]], i32* [[RES]], align 4, !dbg [[DBG135]] -// CHECK-TLS4-NEXT: [[TMP16:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG136:![0-9]+]] -// CHECK-TLS4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP16]], i64 0, i64 1, !dbg [[DBG136]] -// CHECK-TLS4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG136]] -// CHECK-TLS4-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX7]], i32 0, i32 0, !dbg [[DBG137:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP17:%.*]] = load i32, i32* [[A8]], align 4, !dbg [[DBG137]] -// CHECK-TLS4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG138:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[TMP17]], !dbg [[DBG138]] -// CHECK-TLS4-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG138]] -// CHECK-TLS4-NEXT: [[TMP19:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG139:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4, !dbg [[DBG139]] -// CHECK-TLS4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG140:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG140]] -// CHECK-TLS4-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG140]] -// CHECK-TLS4-NEXT: [[TMP22:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG141:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4, !dbg [[DBG141]] -// CHECK-TLS4-NEXT: [[CONV:%.*]] = fptosi float [[TMP23]] to i32, !dbg [[DBG141]] -// CHECK-TLS4-NEXT: [[TMP24:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG142:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP24]], [[CONV]], !dbg [[DBG142]] -// CHECK-TLS4-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG142]] -// CHECK-TLS4-NEXT: [[TMP25:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG143:![0-9]+]] -// CHECK-TLS4-NEXT: [[A12:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP25]], i32 0, i32 0, !dbg [[DBG144:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP26:%.*]] = load i32, i32* [[A12]], align 4, !dbg [[DBG144]] -// CHECK-TLS4-NEXT: [[TMP27:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG145:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP27]], [[TMP26]], !dbg [[DBG145]] -// CHECK-TLS4-NEXT: store i32 [[ADD13]], i32* [[RES]], align 4, !dbg [[DBG145]] +// CHECK-TLS4-NEXT: [[TMP6:%.*]] = call %struct.Smain* @llvm.threadlocal.address.p0s_struct.Smains(%struct.Smain* @_ZZ4mainE2sm), !dbg [[DBG126:![0-9]+]] +// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[TMP6]], i32 0, i32 0, !dbg [[DBG127:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP7:%.*]] = load i32, i32* [[A2]], align 8, !dbg [[DBG127]] +// CHECK-TLS4-NEXT: [[TMP8:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG128:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], [[TMP7]], !dbg [[DBG128]] +// CHECK-TLS4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG128]] +// CHECK-TLS4-NEXT: [[TMP9:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG129:![0-9]+]] +// CHECK-TLS4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP9]], i32 0, i32 0, !dbg [[DBG130:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP10:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG130]] +// CHECK-TLS4-NEXT: [[TMP11:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG131:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP10]], !dbg [[DBG131]] +// CHECK-TLS4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG131]] +// CHECK-TLS4-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG132:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP13:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG133:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP13]], [[TMP12]], !dbg [[DBG133]] +// CHECK-TLS4-NEXT: store i32 [[ADD5]], i32* [[RES]], align 4, !dbg [[DBG133]] +// CHECK-TLS4-NEXT: [[TMP14:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG134:![0-9]+]] +// CHECK-TLS4-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP14]], i32 0, i32 0, !dbg [[DBG135:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP15:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG135]] +// CHECK-TLS4-NEXT: [[TMP16:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG136:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], [[TMP15]], !dbg [[DBG136]] +// CHECK-TLS4-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG136]] +// CHECK-TLS4-NEXT: [[TMP17:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG137:![0-9]+]] +// CHECK-TLS4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP17]], i64 0, i64 1, !dbg [[DBG137]] +// CHECK-TLS4-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG137]] +// CHECK-TLS4-NEXT: [[A9:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX8]], i32 0, i32 0, !dbg [[DBG138:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP18:%.*]] = load i32, i32* [[A9]], align 4, !dbg [[DBG138]] +// CHECK-TLS4-NEXT: [[TMP19:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG139:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], [[TMP18]], !dbg [[DBG139]] +// CHECK-TLS4-NEXT: store i32 [[ADD10]], i32* [[RES]], align 4, !dbg [[DBG139]] +// CHECK-TLS4-NEXT: [[TMP20:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG140:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4, !dbg [[DBG140]] +// CHECK-TLS4-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG141:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP22]], [[TMP21]], !dbg [[DBG141]] +// CHECK-TLS4-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG141]] +// CHECK-TLS4-NEXT: [[TMP23:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG142:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP24:%.*]] = load float, float* [[TMP23]], align 4, !dbg [[DBG142]] +// CHECK-TLS4-NEXT: [[CONV:%.*]] = fptosi float [[TMP24]] to i32, !dbg [[DBG142]] +// CHECK-TLS4-NEXT: [[TMP25:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG143:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP25]], [[CONV]], !dbg [[DBG143]] +// CHECK-TLS4-NEXT: store i32 [[ADD12]], i32* [[RES]], align 4, !dbg [[DBG143]] +// CHECK-TLS4-NEXT: [[TMP26:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG144:![0-9]+]] +// CHECK-TLS4-NEXT: [[A13:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP26]], i32 0, i32 0, !dbg [[DBG145:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP27:%.*]] = load i32, i32* [[A13]], align 4, !dbg [[DBG145]] // CHECK-TLS4-NEXT: [[TMP28:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG146:![0-9]+]] -// CHECK-TLS4-NEXT: ret i32 [[TMP28]], !dbg [[DBG147:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP28]], [[TMP27]], !dbg [[DBG146]] +// CHECK-TLS4-NEXT: store i32 [[ADD14]], i32* [[RES]], align 4, !dbg [[DBG146]] +// CHECK-TLS4-NEXT: [[TMP29:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG147:![0-9]+]] +// CHECK-TLS4-NEXT: ret i32 [[TMP29]], !dbg [[DBG148:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWL3gs1 // CHECK-TLS4-SAME: () #[[ATTR2:[0-9]+]] { // CHECK-TLS4-NEXT: call void @_ZTHL3gs1() -// CHECK-TLS4-NEXT: ret %struct.S1* @_ZL3gs1 +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call %struct.S1* @llvm.threadlocal.address.p0s_struct.S1s(%struct.S1* @_ZL3gs1) +// CHECK-TLS4-NEXT: ret %struct.S1* [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei -// CHECK-TLS4-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] align 2 !dbg [[DBG148:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] align 2 !dbg [[DBG149:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG152:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META153:![0-9]+]], metadata !DIExpression()), !dbg [[DBG154:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG154:![0-9]+]] -// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG154]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG155:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG155:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainC2Ei(%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG155]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG156:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev -// CHECK-TLS4-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] align 2 !dbg [[DBG156:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] align 2 !dbg [[DBG157:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR5]], !dbg [[DBG159:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG160:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR5]], !dbg [[DBG160:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG161:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN6Static1sE @@ -5107,7 +5141,8 @@ // CHECK-TLS4-NEXT: call void @_ZTHN6Static1sE() // CHECK-TLS4-NEXT: br label [[TMP2]] // CHECK-TLS4: 2: -// CHECK-TLS4-NEXT: ret %struct.S3* @_ZN6Static1sE +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call %struct.S3* @llvm.threadlocal.address.p0s_struct.S3s(%struct.S3* @_ZN6Static1sE) +// CHECK-TLS4-NEXT: ret %struct.S3* [[TMP3]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTW3gs3 @@ -5117,434 +5152,439 @@ // CHECK-TLS4-NEXT: call void @_ZTH3gs3() // CHECK-TLS4-NEXT: br label [[TMP2]] // CHECK-TLS4: 2: -// CHECK-TLS4-NEXT: ret %struct.S5* @gs3 +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call %struct.S5* @llvm.threadlocal.address.p0s_struct.S5s(%struct.S5* @gs3) +// CHECK-TLS4-NEXT: ret %struct.S5* [[TMP3]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTW5arr_x // CHECK-TLS4-SAME: () #[[ATTR2]] comdat { // CHECK-TLS4-NEXT: call void @_ZTH5arr_x() -// CHECK-TLS4-NEXT: ret [2 x [3 x %struct.S1]]* @arr_x +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call [2 x [3 x %struct.S1]]* @llvm.threadlocal.address.p0a2a3s_struct.S1s([2 x [3 x %struct.S1]]* @arr_x) +// CHECK-TLS4-NEXT: ret [2 x [3 x %struct.S1]]* [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STIiE2stE // CHECK-TLS4-SAME: () #[[ATTR2]] comdat { -// CHECK-TLS4-NEXT: ret i32* @_ZN2STIiE2stE +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call i32* @llvm.threadlocal.address.p0i32(i32* @_ZN2STIiE2stE) +// CHECK-TLS4-NEXT: ret i32* [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STIfE2stE // CHECK-TLS4-SAME: () #[[ATTR2]] comdat { -// CHECK-TLS4-NEXT: ret float* @_ZN2STIfE2stE +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call float* @llvm.threadlocal.address.p0f32(float* @_ZN2STIfE2stE) +// CHECK-TLS4-NEXT: ret float* [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE // CHECK-TLS4-SAME: () #[[ATTR2]] comdat { // CHECK-TLS4-NEXT: call void @_ZTHN2STI2S4E2stE() -// CHECK-TLS4-NEXT: ret %struct.S4* @_ZN2STI2S4E2stE +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call %struct.S4* @llvm.threadlocal.address.p0s_struct.S4s(%struct.S4* @_ZN2STI2S4E2stE) +// CHECK-TLS4-NEXT: ret %struct.S4* [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_Z6foobarv -// CHECK-TLS4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG161:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG162:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META162:![0-9]+]], metadata !DIExpression()), !dbg [[DBG163:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG164:![0-9]+]] -// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0, !dbg [[DBG165:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG165]] -// CHECK-TLS4-NEXT: store i32 [[TMP1]], i32* [[RES]], align 4, !dbg [[DBG166:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG167:![0-9]+]] -// CHECK-TLS4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0, !dbg [[DBG168:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG168]] -// CHECK-TLS4-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG169:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG169]] -// CHECK-TLS4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG169]] -// CHECK-TLS4-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG170:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG171:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG171]] -// CHECK-TLS4-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG171]] -// CHECK-TLS4-NEXT: [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG172:![0-9]+]] -// CHECK-TLS4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0, !dbg [[DBG173:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG173]] -// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG174:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG174]] -// CHECK-TLS4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG174]] -// CHECK-TLS4-NEXT: [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG175:![0-9]+]] -// CHECK-TLS4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1, !dbg [[DBG175]] -// CHECK-TLS4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG175]] -// CHECK-TLS4-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG176:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP11:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG176]] -// CHECK-TLS4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG177:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG177]] -// CHECK-TLS4-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG177]] -// CHECK-TLS4-NEXT: [[TMP13:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG178:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG178]] -// CHECK-TLS4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG179:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG179]] -// CHECK-TLS4-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG179]] -// CHECK-TLS4-NEXT: [[TMP16:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG180:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4, !dbg [[DBG180]] -// CHECK-TLS4-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG180]] -// CHECK-TLS4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG181:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG181]] -// CHECK-TLS4-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG181]] -// CHECK-TLS4-NEXT: [[TMP19:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG182:![0-9]+]] -// CHECK-TLS4-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP19]], i32 0, i32 0, !dbg [[DBG183:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG183]] -// CHECK-TLS4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG184:![0-9]+]] -// CHECK-TLS4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG184]] -// CHECK-TLS4-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG184]] -// CHECK-TLS4-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG185:![0-9]+]] -// CHECK-TLS4-NEXT: ret i32 [[TMP22]], !dbg [[DBG186:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[RES]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call %struct.S3* @_ZTWN6Static1sE(), !dbg [[DBG165:![0-9]+]] +// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], %struct.S3* [[TMP0]], i32 0, i32 0, !dbg [[DBG166:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4, !dbg [[DBG166]] +// CHECK-TLS4-NEXT: store i32 [[TMP1]], i32* [[RES]], align 4, !dbg [[DBG167:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP2:%.*]] = call %struct.S1* @_ZTWL3gs1(), !dbg [[DBG168:![0-9]+]] +// CHECK-TLS4-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP2]], i32 0, i32 0, !dbg [[DBG169:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = load i32, i32* [[A1]], align 4, !dbg [[DBG169]] +// CHECK-TLS4-NEXT: [[TMP4:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG170:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], [[TMP3]], !dbg [[DBG170]] +// CHECK-TLS4-NEXT: store i32 [[ADD]], i32* [[RES]], align 4, !dbg [[DBG170]] +// CHECK-TLS4-NEXT: [[TMP5:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S2:%.*]], %struct.S2* @_ZL3gs2, i32 0, i32 0), align 8, !dbg [[DBG171:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP6:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG172:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], [[TMP5]], !dbg [[DBG172]] +// CHECK-TLS4-NEXT: store i32 [[ADD2]], i32* [[RES]], align 4, !dbg [[DBG172]] +// CHECK-TLS4-NEXT: [[TMP7:%.*]] = call %struct.S5* @_ZTW3gs3(), !dbg [[DBG173:![0-9]+]] +// CHECK-TLS4-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S5:%.*]], %struct.S5* [[TMP7]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A3]], align 4, !dbg [[DBG174]] +// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG175:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], [[TMP8]], !dbg [[DBG175]] +// CHECK-TLS4-NEXT: store i32 [[ADD4]], i32* [[RES]], align 4, !dbg [[DBG175]] +// CHECK-TLS4-NEXT: [[TMP10:%.*]] = call [2 x [3 x %struct.S1]]* @_ZTW5arr_x(), !dbg [[DBG176:![0-9]+]] +// CHECK-TLS4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* [[TMP10]], i64 0, i64 1, !dbg [[DBG176]] +// CHECK-TLS4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG176]] +// CHECK-TLS4-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYIDX5]], i32 0, i32 0, !dbg [[DBG177:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP11:%.*]] = load i32, i32* [[A6]], align 4, !dbg [[DBG177]] +// CHECK-TLS4-NEXT: [[TMP12:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG178:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], [[TMP11]], !dbg [[DBG178]] +// CHECK-TLS4-NEXT: store i32 [[ADD7]], i32* [[RES]], align 4, !dbg [[DBG178]] +// CHECK-TLS4-NEXT: [[TMP13:%.*]] = call i32* @_ZTWN2STIiE2stE(), !dbg [[DBG179:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG179]] +// CHECK-TLS4-NEXT: [[TMP15:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG180:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP14]], !dbg [[DBG180]] +// CHECK-TLS4-NEXT: store i32 [[ADD8]], i32* [[RES]], align 4, !dbg [[DBG180]] +// CHECK-TLS4-NEXT: [[TMP16:%.*]] = call float* @_ZTWN2STIfE2stE(), !dbg [[DBG181:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4, !dbg [[DBG181]] +// CHECK-TLS4-NEXT: [[CONV:%.*]] = fptosi float [[TMP17]] to i32, !dbg [[DBG181]] +// CHECK-TLS4-NEXT: [[TMP18:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG182:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP18]], [[CONV]], !dbg [[DBG182]] +// CHECK-TLS4-NEXT: store i32 [[ADD9]], i32* [[RES]], align 4, !dbg [[DBG182]] +// CHECK-TLS4-NEXT: [[TMP19:%.*]] = call %struct.S4* @_ZTWN2STI2S4E2stE(), !dbg [[DBG183:![0-9]+]] +// CHECK-TLS4-NEXT: [[A10:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[TMP19]], i32 0, i32 0, !dbg [[DBG184:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A10]], align 4, !dbg [[DBG184]] +// CHECK-TLS4-NEXT: [[TMP21:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG185:![0-9]+]] +// CHECK-TLS4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP21]], [[TMP20]], !dbg [[DBG185]] +// CHECK-TLS4-NEXT: store i32 [[ADD11]], i32* [[RES]], align 4, !dbg [[DBG185]] +// CHECK-TLS4-NEXT: [[TMP22:%.*]] = load i32, i32* [[RES]], align 4, !dbg [[DBG186:![0-9]+]] +// CHECK-TLS4-NEXT: ret i32 [[TMP22]], !dbg [[DBG187:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init -// CHECK-TLS4-SAME: () #[[ATTR7:[0-9]+]] !dbg [[DBG187:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR7:[0-9]+]] !dbg [[DBG188:![0-9]+]] { // CHECK-TLS4-NEXT: entry: -// CHECK-TLS4-NEXT: call void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 noundef 5), !dbg [[DBG191:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG193:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG194:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 noundef 5), !dbg [[DBG192:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S1*)* @_ZN2S1D1Ev to void (i8*)*), i8* bitcast (%struct.S1* @_ZL3gs1 to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG194:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG195:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1C1Ei -// CHECK-TLS4-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG195:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG196:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG198:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG199:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG200:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG201:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG201:![0-9]+]] -// CHECK-TLS4-NEXT: call void @_ZN2S1C2Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG201]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG202:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG202:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S1C2Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG202]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG203:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1D1Ev -// CHECK-TLS4-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG203:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG204:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK-TLS4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META205:![0-9]+]], metadata !DIExpression()), !dbg [[DBG206:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZN2S1D2Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]], !dbg [[DBG206:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG207:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D2Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]], !dbg [[DBG207:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG208:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1C2Ei -// CHECK-TLS4-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG208:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG209:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG212:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META212:![0-9]+]], metadata !DIExpression()), !dbg [[DBG213:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG213:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG214:![0-9]+]] -// CHECK-TLS4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG213]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG215:![0-9]+]] +// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG214:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG215:![0-9]+]] +// CHECK-TLS4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG214]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG216:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1D2Ev -// CHECK-TLS4-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG216:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG217:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 // CHECK-TLS4-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S1** [[THIS_ADDR]], metadata [[META218:![0-9]+]], metadata !DIExpression()), !dbg [[DBG219:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG219:![0-9]+]] -// CHECK-TLS4-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG221:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG222:![0-9]+]] +// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0, !dbg [[DBG220:![0-9]+]] +// CHECK-TLS4-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG222:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG223:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init.1 -// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG223:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG224:![0-9]+]] { // CHECK-TLS4-NEXT: entry: -// CHECK-TLS4-NEXT: call void @_ZN2S2C1Ei(%struct.S2* noundef nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 noundef 27), !dbg [[DBG224:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG226:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG227:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S2C1Ei(%struct.S2* noundef nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 noundef 27), !dbg [[DBG225:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%struct.S2*)* @_ZN2S2D1Ev to void (i8*)*), i8* bitcast (%struct.S2* @_ZL3gs2 to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG227:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG228:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2C1Ei -// CHECK-TLS4-SAME: (%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG228:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG229:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG232:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG234:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG234:![0-9]+]] -// CHECK-TLS4-NEXT: call void @_ZN2S2C2Ei(%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG234]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG235:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG235:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S2C2Ei(%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG235]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG236:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2D1Ev -// CHECK-TLS4-SAME: (%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG236:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG237:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // CHECK-TLS4-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG238:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZN2S2D2Ev(%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR5]], !dbg [[DBG239:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG240:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S2D2Ev(%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR5]], !dbg [[DBG240:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG241:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2C2Ei -// CHECK-TLS4-SAME: (%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG241:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG242:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META243:![0-9]+]], metadata !DIExpression()), !dbg [[DBG244:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META245:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG246:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG247:![0-9]+]] -// CHECK-TLS4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG246]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG248:![0-9]+]] +// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG247:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG248:![0-9]+]] +// CHECK-TLS4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG247]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG249:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2D2Ev -// CHECK-TLS4-SAME: (%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG249:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S2* noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG250:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S2*, align 8 // CHECK-TLS4-NEXT: store %struct.S2* [[THIS]], %struct.S2** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG251:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S2** [[THIS_ADDR]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S2*, %struct.S2** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG252:![0-9]+]] -// CHECK-TLS4-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG254:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG255:![0-9]+]] +// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], %struct.S2* [[THIS1]], i32 0, i32 0, !dbg [[DBG253:![0-9]+]] +// CHECK-TLS4-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG255:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG256:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init.2 -// CHECK-TLS4-SAME: () #[[ATTR7]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG256:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR7]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG257:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[ARRAYINIT_ENDOFINIT:%.*]] = alloca [3 x %struct.S1]*, align 8 // CHECK-TLS4-NEXT: [[ARRAYINIT_ENDOFINIT1:%.*]] = alloca %struct.S1*, align 8 // CHECK-TLS4-NEXT: [[EXN_SLOT:%.*]] = alloca i8*, align 8 // CHECK-TLS4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: [[ARRAYINIT_ENDOFINIT5:%.*]] = alloca %struct.S1*, align 8 -// CHECK-TLS4-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG257:![0-9]+]] -// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG259:![0-9]+]] +// CHECK-TLS4-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG258:![0-9]+]] +// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG260:![0-9]+]] // CHECK-TLS4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), i32 noundef 1) -// CHECK-TLS4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG260:![0-9]+]] +// CHECK-TLS4-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG261:![0-9]+]] // CHECK-TLS4: invoke.cont: -// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG259]] +// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG260]] // CHECK-TLS4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 1), i32 noundef 2) -// CHECK-TLS4-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG261:![0-9]+]] +// CHECK-TLS4-NEXT: to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG262:![0-9]+]] // CHECK-TLS4: invoke.cont2: -// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG259]] +// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG260]] // CHECK-TLS4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 2), i32 noundef 3) -// CHECK-TLS4-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG262:![0-9]+]] +// CHECK-TLS4-NEXT: to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]], !dbg [[DBG263:![0-9]+]] // CHECK-TLS4: invoke.cont3: -// CHECK-TLS4-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG263:![0-9]+]] +// CHECK-TLS4-NEXT: store [3 x %struct.S1]* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1), [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG258]] +// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG264:![0-9]+]] // CHECK-TLS4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), i32 noundef 4) -// CHECK-TLS4-NEXT: to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG264:![0-9]+]] +// CHECK-TLS4-NEXT: to label [[INVOKE_CONT7:%.*]] unwind label [[LPAD6:%.*]], !dbg [[DBG265:![0-9]+]] // CHECK-TLS4: invoke.cont7: -// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG263]] +// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG264]] // CHECK-TLS4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 1), i32 noundef 5) -// CHECK-TLS4-NEXT: to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG265:![0-9]+]] +// CHECK-TLS4-NEXT: to label [[INVOKE_CONT8:%.*]] unwind label [[LPAD6]], !dbg [[DBG266:![0-9]+]] // CHECK-TLS4: invoke.cont8: -// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG263]] +// CHECK-TLS4-NEXT: store %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG264]] // CHECK-TLS4-NEXT: invoke void @_ZN2S1C1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 2), i32 noundef 6) -// CHECK-TLS4-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG266:![0-9]+]] +// CHECK-TLS4-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG267:![0-9]+]] // CHECK-TLS4: invoke.cont9: -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG267:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG267]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* @__cxx_global_array_dtor, i8* null, i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG268:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG268]] // CHECK-TLS4: lpad: // CHECK-TLS4-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } -// CHECK-TLS4-NEXT: cleanup, !dbg [[DBG268:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG259]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG259]] -// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG259]] +// CHECK-TLS4-NEXT: cleanup, !dbg [[DBG269:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP2:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 0, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: store i8* [[TMP2]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = extractvalue { i8*, i32 } [[TMP1]], 1, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: store i32 [[TMP3]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: [[TMP4:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG260]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[TMP4]], !dbg [[DBG260]] +// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG260]] // CHECK-TLS4: arraydestroy.body: -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG259]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG259]] -// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG259]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG259]] -// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG259]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG260]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG260]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG260]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG260]] +// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG260]] // CHECK-TLS4: arraydestroy.done4: -// CHECK-TLS4-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG259]] +// CHECK-TLS4-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG260]] // CHECK-TLS4: lpad6: // CHECK-TLS4-NEXT: [[TMP5:%.*]] = landingpad { i8*, i32 } -// CHECK-TLS4-NEXT: cleanup, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG268]] -// CHECK-TLS4-NEXT: [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG263]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG263]] -// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG263]] +// CHECK-TLS4-NEXT: cleanup, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: store i8* [[TMP6]], i8** [[EXN_SLOT]], align 8, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: [[TMP7:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 1, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: store i32 [[TMP7]], i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG269]] +// CHECK-TLS4-NEXT: [[TMP8:%.*]] = load %struct.S1*, %struct.S1** [[ARRAYINIT_ENDOFINIT5]], align 8, !dbg [[DBG264]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ISEMPTY10:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), [[TMP8]], !dbg [[DBG264]] +// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY10]], label [[ARRAYDESTROY_DONE15:%.*]], label [[ARRAYDESTROY_BODY11:%.*]], !dbg [[DBG264]] // CHECK-TLS4: arraydestroy.body11: -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG263]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG263]] -// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR5]], !dbg [[DBG263]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG263]] -// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG263]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi %struct.S1* [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG264]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG264]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR5]], !dbg [[DBG264]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 1, i64 0), !dbg [[DBG264]] +// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG264]] // CHECK-TLS4: arraydestroy.done15: -// CHECK-TLS4-NEXT: br label [[EHCLEANUP]], !dbg [[DBG263]] +// CHECK-TLS4-NEXT: br label [[EHCLEANUP]], !dbg [[DBG264]] // CHECK-TLS4: ehcleanup: -// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG257]] -// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG257]] +// CHECK-TLS4-NEXT: [[TMP9:%.*]] = load [3 x %struct.S1]*, [3 x %struct.S1]** [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG258]] +// CHECK-TLS4-NEXT: [[PAD_ARRAYEND:%.*]] = getelementptr inbounds [3 x %struct.S1], [3 x %struct.S1]* [[TMP9]], i64 0, i64 0, !dbg [[DBG258]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ISEMPTY16:%.*]] = icmp eq %struct.S1* getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), [[PAD_ARRAYEND]], !dbg [[DBG258]] +// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY16]], label [[ARRAYDESTROY_DONE21:%.*]], label [[ARRAYDESTROY_BODY17:%.*]], !dbg [[DBG258]] // CHECK-TLS4: arraydestroy.body17: -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG257]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR5]], !dbg [[DBG257]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG257]] -// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG257]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi %struct.S1* [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG258]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG258]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR5]], !dbg [[DBG258]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT19]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 0, i64 0, i64 0), !dbg [[DBG258]] +// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG258]] // CHECK-TLS4: arraydestroy.done21: -// CHECK-TLS4-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG257]] +// CHECK-TLS4-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG258]] // CHECK-TLS4: eh.resume: -// CHECK-TLS4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG257]] +// CHECK-TLS4-NEXT: [[EXN:%.*]] = load i8*, i8** [[EXN_SLOT]], align 8, !dbg [[DBG258]] +// CHECK-TLS4-NEXT: [[SEL:%.*]] = load i32, i32* [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG258]] +// CHECK-TLS4-NEXT: [[LPAD_VAL:%.*]] = insertvalue { i8*, i32 } undef, i8* [[EXN]], 0, !dbg [[DBG258]] +// CHECK-TLS4-NEXT: [[LPAD_VAL22:%.*]] = insertvalue { i8*, i32 } [[LPAD_VAL]], i32 [[SEL]], 1, !dbg [[DBG258]] +// CHECK-TLS4-NEXT: resume { i8*, i32 } [[LPAD_VAL22]], !dbg [[DBG258]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_array_dtor -// CHECK-TLS4-SAME: (i8* noundef [[TMP0:%.*]]) #[[ATTR7]] !dbg [[DBG269:![0-9]+]] { +// CHECK-TLS4-SAME: (i8* noundef [[TMP0:%.*]]) #[[ATTR7]] !dbg [[DBG270:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK-TLS4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274:![0-9]+]] -// CHECK-TLS4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG274]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i8** [[DOTADDR]], metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG275:![0-9]+]] +// CHECK-TLS4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG275]] // CHECK-TLS4: arraydestroy.body: -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG274]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG274]] -// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG274]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG274]] -// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG274]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S1* [ getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i64 1, i64 0, i64 0), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG275]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG275]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(%struct.S1* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG275]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S1* [[ARRAYDESTROY_ELEMENT]], getelementptr inbounds ([2 x [3 x %struct.S1]], [2 x [3 x %struct.S1]]* @arr_x, i32 0, i32 0, i32 0), !dbg [[DBG275]] +// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG275]] // CHECK-TLS4: arraydestroy.done1: -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG274]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG275]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei -// CHECK-TLS4-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] align 2 !dbg [[DBG275:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] align 2 !dbg [[DBG276:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG277:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG278:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META278:![0-9]+]], metadata !DIExpression()), !dbg [[DBG279:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META279:![0-9]+]], metadata !DIExpression()), !dbg [[DBG280:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG280:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG281:![0-9]+]] -// CHECK-TLS4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG280]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG282:![0-9]+]] +// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG281:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG282:![0-9]+]] +// CHECK-TLS4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 8, !dbg [[DBG281]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG283:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev -// CHECK-TLS4-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] align 2 !dbg [[DBG283:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.Smain* noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] align 2 !dbg [[DBG284:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.Smain*, align 8 // CHECK-TLS4-NEXT: store %struct.Smain* [[THIS]], %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.Smain** [[THIS_ADDR]], metadata [[META285:![0-9]+]], metadata !DIExpression()), !dbg [[DBG286:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.Smain*, %struct.Smain** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]] -// CHECK-TLS4-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG288:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG289:![0-9]+]] +// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], %struct.Smain* [[THIS1]], i32 0, i32 0, !dbg [[DBG287:![0-9]+]] +// CHECK-TLS4-NEXT: store i32 0, i32* [[A]], align 8, !dbg [[DBG289:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG290:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@__cxx_global_var_init.3 -// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG290:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG291:![0-9]+]] { // CHECK-TLS4-NEXT: entry: -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG291:![0-9]+]] -// CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG291]] -// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG291]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG292:![0-9]+]] +// CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG292]] +// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG292]] // CHECK-TLS4: init.check: -// CHECK-TLS4-NEXT: call void @_ZN2S4C1Ei(%struct.S4* noundef nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 noundef 23), !dbg [[DBG292:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG291]] -// CHECK-TLS4-NEXT: store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG291]] -// CHECK-TLS4-NEXT: br label [[INIT_END]], !dbg [[DBG291]] +// CHECK-TLS4-NEXT: call void @_ZN2S4C1Ei(%struct.S4* noundef nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 noundef 23), !dbg [[DBG293:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(void (i8*)* bitcast (void (%struct.S4*)* @_ZN2S4D1Ev to void (i8*)*), i8* bitcast (%struct.S4* @_ZN2STI2S4E2stE to i8*), i8* @__dso_handle) #[[ATTR5]], !dbg [[DBG292]] +// CHECK-TLS4-NEXT: store i8 1, i8* bitcast (i64* @_ZGVN2STI2S4E2stE to i8*), align 8, !dbg [[DBG292]] +// CHECK-TLS4-NEXT: br label [[INIT_END]], !dbg [[DBG292]] // CHECK-TLS4: init.end: -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG294:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG295:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4C1Ei -// CHECK-TLS4-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG295:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG296:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG299:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG301:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG301:![0-9]+]] -// CHECK-TLS4-NEXT: call void @_ZN2S4C2Ei(%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG301]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG302:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG302:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S4C2Ei(%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG302]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG303:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4D1Ev -// CHECK-TLS4-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG303:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG304:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // CHECK-TLS4-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG305:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZN2S4D2Ev(%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR5]], !dbg [[DBG306:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG307:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S4D2Ev(%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR5]], !dbg [[DBG307:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG308:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4C2Ei -// CHECK-TLS4-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG308:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG309:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG310:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG311:![0-9]+]] // CHECK-TLS4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG313:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG313:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG314:![0-9]+]] -// CHECK-TLS4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG313]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG315:![0-9]+]] +// CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG314:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG315:![0-9]+]] +// CHECK-TLS4-NEXT: store i32 [[TMP0]], i32* [[A2]], align 4, !dbg [[DBG314]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG316:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4D2Ev -// CHECK-TLS4-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG316:![0-9]+]] { +// CHECK-TLS4-SAME: (%struct.S4* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG317:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S4*, align 8 // CHECK-TLS4-NEXT: store %struct.S4* [[THIS]], %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META317:![0-9]+]], metadata !DIExpression()), !dbg [[DBG318:![0-9]+]] +// CHECK-TLS4-NEXT: call void @llvm.dbg.declare(metadata %struct.S4** [[THIS_ADDR]], metadata [[META318:![0-9]+]], metadata !DIExpression()), !dbg [[DBG319:![0-9]+]] // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load %struct.S4*, %struct.S4** [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG319:![0-9]+]] -// CHECK-TLS4-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG321:![0-9]+]] -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG322:![0-9]+]] +// CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], %struct.S4* [[THIS1]], i32 0, i32 0, !dbg [[DBG320:![0-9]+]] +// CHECK-TLS4-NEXT: store i32 0, i32* [[A]], align 4, !dbg [[DBG322:![0-9]+]] +// CHECK-TLS4-NEXT: ret void, !dbg [[DBG323:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_GLOBAL__sub_I_threadprivate_codegen.cpp -// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG323:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG324:![0-9]+]] { // CHECK-TLS4-NEXT: entry: -// CHECK-TLS4-NEXT: call void @__cxx_global_var_init.1(), !dbg [[DBG325:![0-9]+]] +// CHECK-TLS4-NEXT: call void @__cxx_global_var_init.1(), !dbg [[DBG326:![0-9]+]] // CHECK-TLS4-NEXT: ret void // // // CHECK-TLS4-LABEL: define {{[^@]+}}@__tls_init -// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG326:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR7]] !dbg [[DBG327:![0-9]+]] { // CHECK-TLS4-NEXT: entry: -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1, !dbg [[DBG327:![0-9]+]] -// CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG327]] -// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG327]], !prof [[PROF119]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, i8* @__tls_guard, align 1, !dbg [[DBG328:![0-9]+]] +// CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG328]] +// CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT:%.*]], label [[EXIT:%.*]], !dbg [[DBG328]], !prof [[PROF119]] // CHECK-TLS4: init: -// CHECK-TLS4-NEXT: store i8 1, i8* @__tls_guard, align 1, !dbg [[DBG327]] -// CHECK-TLS4-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG327]] -// CHECK-TLS4-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG327]] -// CHECK-TLS4-NEXT: br label [[EXIT]], !dbg [[DBG327]] +// CHECK-TLS4-NEXT: store i8 1, i8* @__tls_guard, align 1, !dbg [[DBG328]] +// CHECK-TLS4-NEXT: call void @__cxx_global_var_init(), !dbg [[DBG328]] +// CHECK-TLS4-NEXT: call void @__cxx_global_var_init.2(), !dbg [[DBG328]] +// CHECK-TLS4-NEXT: br label [[EXIT]], !dbg [[DBG328]] // CHECK-TLS4: exit: // CHECK-TLS4-NEXT: ret void // Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -24528,6 +24528,37 @@ mask argument does not match the pointer size of the target, the mask is zero-extended or truncated accordingly. +.. _int_threadlocal_address: + +'``llvm.threadlocal.address``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn + +Arguments: +"""""""""" + +The first argument is pointer, which refers to a thread local variable. + +Overview: +"""""""""" + +The LLVM treated the address of thread local variable as a constant expression. +But it is not true. The ``llvm.threadlocal.address`` intrinsic would represent +the address of the thread local variable. + +Semantics: +"""""""""" + +The address of a thread local variable is not a constant, since it depends on +the calling thread. The `llvm.threadlocal.address` intrinsic returns the +address of the given thread local variable in the calling thread. + .. _int_vscale: '``llvm.vscale``' Intrinsic Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -649,6 +649,7 @@ case Intrinsic::coro_align: case Intrinsic::coro_suspend: case Intrinsic::coro_subfn_addr: + case Intrinsic::threadlocal_address: // These intrinsics don't actually represent code after lowering. return 0; } Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -749,6 +749,9 @@ /// If the pointer isn't i8* it will be converted. CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr); + /// Create a call to llvm.threadlocal.address intrinsic. + CallInst *CreateThreadLocalAddress(Value *Ptr); + /// Create a call to Masked Load intrinsic CallInst *CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru = nullptr, const Twine &Name = ""); Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1402,6 +1402,10 @@ def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +// Intrinsic to wrap a thread local variable. +def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7167,6 +7167,10 @@ DAG.getZExtOrTrunc(Const, sdl, PtrVT))); return; } + case Intrinsic::threadlocal_address: { + setValue(&I, getValue(I.getOperand(0))); + return; + } case Intrinsic::get_active_lane_mask: { EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); Index: llvm/lib/IR/IRBuilder.cpp =================================================================== --- llvm/lib/IR/IRBuilder.cpp +++ llvm/lib/IR/IRBuilder.cpp @@ -528,6 +528,19 @@ return createCallHelper(TheFn, Ops, this); } +CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) { + if (!isa(Ptr) || !cast(Ptr)->isThreadLocal()) { + llvm::outs() << "Non Global Value wanted.\n"; + Ptr->dump(); + } + assert(isa(Ptr) && cast(Ptr)->isThreadLocal() && + "threadlocal_address only applies to thread local variables."); + auto *C = CreateIntrinsic(llvm::Intrinsic::threadlocal_address, {Ptr->getType()}, + {Ptr}); + llvm::outs() << *C << "\n"; + return C; +} + CallInst * IRBuilderBase::CreateAssumption(Value *Cond, ArrayRef OpBundles) { Index: llvm/lib/IR/Verifier.cpp =================================================================== --- llvm/lib/IR/Verifier.cpp +++ llvm/lib/IR/Verifier.cpp @@ -5596,6 +5596,13 @@ &Call); break; } + case Intrinsic::threadlocal_address: { + GlobalValue *GV = dyn_cast(Call.getOperand(0)); + Check(GV && GV->isThreadLocal(), + "@llvm.threadlocal.address requires a threadlocal global variable.", + &Call); + break; + } }; } Index: llvm/test/CodeGen/X86/threadlocal_address.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/threadlocal_address.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -stop-after=finalize-isel %s -o - | FileCheck %s + +@i = thread_local global i32 0, align 4 + +define noundef i32 @foo() { +; CHECK: %0:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @i, $noreg :: (load (s64) from got) +; CHECK: %1:gr32 = MOV32rm %0, 1, $noreg, 0, $fs :: (load (s32) from %ir.0) +; CHECK: %2:gr32 = nsw INC32r %1, implicit-def dead $eflags +; CHECK: MOV32mr %0, 1, $noreg, 0, $fs, %2 :: (store (s32) into %ir.0) +; CHECK: $eax = COPY %2 +; CHECK: RET 0, $eax +entry: + %0 = call ptr @llvm.threadlocal.address(ptr @i) + %1 = load i32, ptr %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr %0, align 4 + %2 = call ptr @llvm.threadlocal.address(ptr @i) + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +@j = thread_local addrspace(1) global i32 addrspace(0)* @i, align 4 +define noundef i32 @bar() { +; CHECK: %0:gr64 = MOV64rm $rip, 1, $noreg, target-flags(x86-gottpoff) @j, $noreg :: (load (s64) from got) +; CHECK: %1:gr32 = MOV32rm %0, 1, $noreg, 0, $fs :: (load (s32) from %ir.0, addrspace 1) +; CHECK: %2:gr32 = nsw INC32r %1, implicit-def dead $eflags +; CHECK: MOV32mr %0, 1, $noreg, 0, $fs, %2 :: (store (s32) into %ir.0, addrspace 1) +; CHECK: $eax = COPY %2 +; CHECK: RET 0, $eax +entry: + %0 = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @j) + %1 = load i32, ptr addrspace(1) %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr addrspace(1) %0, align 4 + %2 = call ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1) @j) + %3 = load i32, ptr addrspace(1) %2, align 4 + ret i32 %3 +} + +declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn +declare ptr addrspace(1) @llvm.threadlocal.address.p1(ptr addrspace(1)) nounwind readnone willreturn Index: llvm/test/Verifier/threadlocal_address.ll =================================================================== --- /dev/null +++ llvm/test/Verifier/threadlocal_address.ll @@ -0,0 +1,13 @@ +; RUN: not opt -passes=verify -S < %s 2>&1 | FileCheck %s + +@i = global i32 0, align 4 + +define void @f(ptr %p) { + ; CHECK: @llvm.threadlocal.address requires a threadlocal global variable + %a = call ptr @llvm.threadlocal.address(ptr %p) + ; CHECK: @llvm.threadlocal.address requires a threadlocal global variable + %b = call ptr @llvm.threadlocal.address(ptr @i) + ret void +} + +declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn