Index: include/clang/Basic/LangOptions.def =================================================================== --- include/clang/Basic/LangOptions.def +++ include/clang/Basic/LangOptions.def @@ -161,6 +161,7 @@ LANGOPT(HalfArgsAndReturns, 1, 0, "half args and returns") LANGOPT(CUDA , 1, 0, "CUDA") LANGOPT(OpenMP , 1, 0, "OpenMP support") +LANGOPT(OpenMPUseTLS , 1, 0, "OpenMP can use TLS") LANGOPT(CUDAIsDevice , 1, 0, "Compiling for CUDA device") LANGOPT(CUDAAllowHostCallsFromHostDevice, 1, 0, "Allow host device functions to call host functions") LANGOPT(CUDADisableTargetCallChecks, 1, 0, "Disable checks for call targets (host, device, etc.)") Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -879,6 +879,7 @@ def fopenmp : Flag<["-"], "fopenmp">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fno_openmp : Flag<["-"], "fno-openmp">, Group, Flags<[NoArgumentUnused]>; def fopenmp_EQ : Joined<["-"], "fopenmp=">, Group; +def fopenmp_use_tls : Flag<["-"], "fopenmp-use-tls">, Group, Flags<[CC1Option, NoArgumentUnused]>; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; def foptimize_sibling_calls : Flag<["-"], "foptimize-sibling-calls">, Group; def force__cpusubtype__ALL : Flag<["-"], "force_cpusubtype_ALL">; Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -349,6 +349,10 @@ /// llvm::Value *getCriticalRegionLock(StringRef CriticalName); + /// \brief Is set to true if TLS should be used for OpenMP thread private + //// variables. It is false by default. + bool UseTLSForThreadPrivate; + public: explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -267,6 +267,9 @@ llvm::PointerType::getUnqual(CGM.Int32Ty)}; Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); + + UseTLSForThreadPrivate = CGM.getLangOpts().OpenMPUseTLS && + CGM.getTarget().isTLSSupported(); } void CGOpenMPRuntime::clear() { @@ -900,6 +903,10 @@ llvm::Constant * CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { + + assert(!UseTLSForThreadPrivate && + "Cache is not required for thread private global!"); + // Lookup the entry, lazily creating it if necessary. return getOrCreateInternalVariable(CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)) + ".cache."); @@ -909,6 +916,14 @@ const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc) { + // If using TLS to implement OpenMP thread private, we only need to return + // the address of the global variable after assuring the declaration is marked + // thread local. + if (UseTLSForThreadPrivate) { + CGM.GetGlobalValue(CGM.getMangledName(VD))->setThreadLocal(true); + return CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy); + } + auto VarTy = VDAddr->getType()->getPointerElementType(); llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), @@ -938,6 +953,16 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF) { + + // If it is profitable to use TLS for the current target, we should mark the + // privatized global declaration as thread local. + llvm::GlobalVariable *TLSGV = nullptr; + if (UseTLSForThreadPrivate) { + TLSGV = + cast(CGM.GetGlobalValue(CGM.getMangledName(VD))); + TLSGV->setThreadLocal(true); + } + VD = VD->getDefinition(CGM.getContext()); if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { ThreadPrivateWithDefinition.insert(VD); @@ -962,20 +987,28 @@ FTy, ".__kmpc_global_ctor_.", Loc); CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, Args, SourceLocation()); - auto ArgVal = CtorCGF.EmitLoadOfScalar( - CtorCGF.GetAddrOfLocalVar(&Dst), - /*Volatile=*/false, CGM.PointerAlignInBytes, - CGM.getContext().VoidPtrTy, Dst.getLocation()); - auto Arg = CtorCGF.Builder.CreatePointerCast( - ArgVal, - CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); - CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), + + llvm::Value *OrigVal = nullptr, *RetVal = nullptr; + + // If we are allowed to use TLS for thread private, we initialize and + // return the TLS global variable directly. Otherwise we need to load + // the pointer from the Ctor arguments. + if (TLSGV) { + RetVal = CtorCGF.Builder.CreatePointerCast(TLSGV, CtorCGF.VoidPtrTy); + OrigVal = TLSGV; + } else { + RetVal = CtorCGF.EmitLoadOfScalar( + CtorCGF.GetAddrOfLocalVar(&Dst), + /*Volatile=*/false, CGM.PointerAlignInBytes, + CGM.getContext().VoidPtrTy, Dst.getLocation()); + OrigVal = CtorCGF.Builder.CreatePointerCast( + RetVal, + CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); + } + + CtorCGF.EmitAnyExprToMem(Init, OrigVal, Init->getType().getQualifiers(), /*IsInitializer=*/true); - ArgVal = CtorCGF.EmitLoadOfScalar( - CtorCGF.GetAddrOfLocalVar(&Dst), - /*Volatile=*/false, CGM.PointerAlignInBytes, - CGM.getContext().VoidPtrTy, Dst.getLocation()); - CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); + CtorCGF.Builder.CreateStore(RetVal, CtorCGF.ReturnValue); CtorCGF.FinishFunction(); Ctor = Fn; } @@ -996,11 +1029,20 @@ FTy, ".__kmpc_global_dtor_.", Loc); DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, SourceLocation()); - auto ArgVal = DtorCGF.EmitLoadOfScalar( - DtorCGF.GetAddrOfLocalVar(&Dst), - /*Volatile=*/false, CGM.PointerAlignInBytes, - CGM.getContext().VoidPtrTy, Dst.getLocation()); - DtorCGF.emitDestroy(ArgVal, ASTTy, + + llvm::Value *OrigVal = nullptr; + + // If we are allowed to use TLS, use the TLS global variable, otherwise + // use the pointer passed to the Dtor. + if (TLSGV) { + OrigVal = DtorCGF.Builder.CreatePointerCast(TLSGV, DtorCGF.VoidPtrTy); + } else { + OrigVal = DtorCGF.EmitLoadOfScalar( + DtorCGF.GetAddrOfLocalVar(&Dst), + /*Volatile=*/false, CGM.PointerAlignInBytes, + CGM.getContext().VoidPtrTy, Dst.getLocation()); + } + DtorCGF.emitDestroy(OrigVal, ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); DtorCGF.FinishFunction(); Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -3893,6 +3893,17 @@ case OMPRT_IOMP5: // Clang can generate useful OpenMP code for these two runtime libraries. CmdArgs.push_back("-fopenmp"); + + // Inform the frontend that TLS can be used during OpenMP code generation. + // This is the default for PowerPC machines. + if (Args.hasArg(options::OPT_fopenmp_use_tls)) + CmdArgs.push_back("-fopenmp-use-tls"); + else { + const llvm::Triple::ArchType Arch = getToolChain().getArch(); + if ((Arch == llvm::Triple::ppc || Arch == llvm::Triple::ppc64 || + Arch == llvm::Triple::ppc64le)) + CmdArgs.push_back("-fopenmp-use-tls"); + } break; default: // By default, if Clang doesn't know how to generate useful OpenMP code Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -1648,6 +1648,9 @@ // Check if -fopenmp is specified. Opts.OpenMP = Args.hasArg(options::OPT_fopenmp); + // Check if TLS can be used in the OpenMP code generation. + Opts.OpenMPUseTLS = Args.hasArg(options::OPT_fopenmp_use_tls); + // Record whether the __DEPRECATED define was requested. Opts.Deprecated = Args.hasFlag(OPT_fdeprecated_macro, OPT_fno_deprecated_macro, Index: test/Driver/ppc-features.cpp =================================================================== --- test/Driver/ppc-features.cpp +++ test/Driver/ppc-features.cpp @@ -142,4 +142,8 @@ // RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -o %t.o 2>&1 | FileCheck -check-prefix=CHECK_LE_LD_ARGS %s // CHECK_LE_LD_ARGS: "elf64lppc" - +// OpenMP features +// RUN: %clang -target powerpc-unknown-linux-gnu %s -### -fopenmp=libomp -o %t.o 2>&1 | FileCheck -check-prefix=CHECK_OPENMP_TLS %s +// RUN: %clang -target powerpc64-unknown-linux-gnu %s -### -fopenmp=libomp -o %t.o 2>&1 | FileCheck -check-prefix=CHECK_OPENMP_TLS %s +// RUN: %clang -target powerpc64le-unknown-linux-gnu %s -### -fopenmp=libomp -o %t.o 2>&1 | FileCheck -check-prefix=CHECK_OPENMP_TLS %s +// CHECK_OPENMP_TLS: "-fopenmp-use-tls" Index: test/OpenMP/threadprivate_codegen.cpp =================================================================== --- test/OpenMP/threadprivate_codegen.cpp +++ test/OpenMP/threadprivate_codegen.cpp @@ -1,6 +1,13 @@ // RUN: %clang_cc1 -verify -fopenmp -DBODY -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s // RUN: %clang_cc1 -fopenmp -DBODY -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s +// +// Same test as before but using a target that can use TLS to implement thread +// private. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-use-tls -DBODY -triple powerpc64le-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix=CHECK-TLS +// RUN: %clang_cc1 -fopenmp -fopenmp-use-tls -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-use-tls -DBODY -x c++ -triple powerpc64le-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-TLS-DEBUG %s + // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -11,6 +18,13 @@ // CHECK-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] } // CHECK-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] } // CHECK-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double } +// CHECK-TLS-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-TLS-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] } +// CHECK-TLS-DAG: [[S2:%.+]] = type { [[INT]], double } +// CHECK-TLS-DAG: [[S3:%.+]] = type { [[INT]], float } +// CHECK-TLS-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] } +// CHECK-TLS-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] } +// CHECK-TLS-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double } // CHECK-DEBUG-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* } // CHECK-DEBUG-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] } // CHECK-DEBUG-DAG: [[S2:%.+]] = type { [[INT]], double } @@ -18,6 +32,13 @@ // CHECK-DEBUG-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] } // CHECK-DEBUG-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] } // CHECK-DEBUG-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double } +// CHECK-TLS-DEBUG-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-TLS-DEBUG-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] } +// CHECK-TLS-DEBUG-DAG: [[S2:%.+]] = type { [[INT]], double } +// CHECK-TLS-DEBUG-DAG: [[S3:%.+]] = type { [[INT]], float } +// CHECK-TLS-DEBUG-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] } +// CHECK-TLS-DEBUG-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] } +// CHECK-TLS-DEBUG-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double } struct S1 { int a; @@ -120,9 +141,19 @@ // CHECK-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer // CHECK-DAG: [[ST_S4_ST]].cache. = common global i8** null // CHECK-NOT: .cache. = common global i8** null +// CHECK-TLS-DAG: [[GS1:@.+]] = internal thread_local global [[S1]] zeroinitializer +// CHECK-TLS-DAG: [[DEFAULT_LOC:@.+]] = private unnamed_addr constant [[IDENT]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8], [{{[0-9]+}} x i8]* {{@.+}}, i32 0, i32 0) } +// CHECK-TLS-DAG: [[GS2:@.+]] = internal global [[S2]] zeroinitializer +// CHECK-TLS-DAG: [[ARR_X:@.+]] = thread_local global [2 x [3 x [[S1]]]] zeroinitializer +// CHECK-TLS-DAG: [[SM:@.+]] = internal thread_local global [[SMAIN]] zeroinitializer +// CHECK-TLS-DAG: [[STATIC_S:@.+]] = external thread_local global [[S3]] +// CHECK-TLS-DAG: [[GS3:@.+]] = external thread_local global [[S5]] +// CHECK-TLS-DAG: [[ST_INT_ST:@.+]] = linkonce_odr thread_local global i32 23 +// CHECK-TLS-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr thread_local global float 2.300000e+01 +// CHECK-TLS-DAG: [[ST_S4_ST:@.+]] = linkonce_odr thread_local global %struct.S4 zeroinitializer // There is no cache for gs2 - it is not threadprivate. Check that there is only // 8 caches created (for Static::s, gs1, gs3, arr_x, main::sm, ST::st, -// ST::st, ST::st) +// ST::st, ST::st) or 8 TLS global variables. // CHECK-DEBUG-DAG: [[GS1:@.+]] = internal global [[S1]] zeroinitializer // CHECK-DEBUG-DAG: [[GS2:@.+]] = internal global [[S2]] zeroinitializer // CHECK-DEBUG-DAG: [[ARR_X:@.+]] = global [2 x [3 x [[S1]]]] zeroinitializer @@ -132,26 +163,39 @@ // CHECK-DEBUG-DAG: [[ST_INT_ST:@.+]] = linkonce_odr global i32 23 // CHECK-DEBUG-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01 // CHECK-DEBUG-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer -// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;162;9;;\00" -// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;217;9;;\00" -// CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;304;19;;\00" -// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;329;9;;\00" -// CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;342;9;;\00" -// CHECK-DEBUG-DAG: [[LOC6:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;359;10;;\00" -// CHECK-DEBUG-DAG: [[LOC7:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;376;10;;\00" -// CHECK-DEBUG-DAG: [[LOC8:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;402;10;;\00" -// CHECK-DEBUG-DAG: [[LOC9:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;423;10;;\00" -// CHECK-DEBUG-DAG: [[LOC10:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;438;10;;\00" -// CHECK-DEBUG-DAG: [[LOC11:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;455;27;;\00" -// CHECK-DEBUG-DAG: [[LOC12:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;472;10;;\00" -// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;551;9;;\00" -// CHECK-DEBUG-DAG: [[LOC14:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;568;10;;\00" -// CHECK-DEBUG-DAG: [[LOC15:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;594;10;;\00" -// CHECK-DEBUG-DAG: [[LOC16:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;615;10;;\00" -// CHECK-DEBUG-DAG: [[LOC17:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;630;10;;\00" -// CHECK-DEBUG-DAG: [[LOC18:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;647;27;;\00" -// CHECK-DEBUG-DAG: [[LOC19:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;664;10;;\00" -// CHECK-DEBUG-DAG: [[LOC20:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;276;9;;\00" +// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;206;9;;\00" +// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;296;9;;\00" +// CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;420;19;;\00" +// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;457;9;;\00" +// CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;474;9;;\00" +// CHECK-DEBUG-DAG: [[LOC6:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;499;10;;\00" +// CHECK-DEBUG-DAG: [[LOC7:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;524;10;;\00" +// CHECK-DEBUG-DAG: [[LOC8:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;566;10;;\00" +// CHECK-DEBUG-DAG: [[LOC9:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;595;10;;\00" +// CHECK-DEBUG-DAG: [[LOC10:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;618;10;;\00" +// CHECK-DEBUG-DAG: [[LOC11:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;645;27;;\00" +// CHECK-DEBUG-DAG: [[LOC12:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;670;10;;\00" +// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;778;9;;\00" +// CHECK-DEBUG-DAG: [[LOC14:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;803;10;;\00" +// CHECK-DEBUG-DAG: [[LOC15:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;845;10;;\00" +// CHECK-DEBUG-DAG: [[LOC16:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;874;10;;\00" +// CHECK-DEBUG-DAG: [[LOC17:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;897;10;;\00" +// CHECK-DEBUG-DAG: [[LOC18:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;924;27;;\00" +// CHECK-DEBUG-DAG: [[LOC19:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;foobar;949;10;;\00" +// CHECK-DEBUG-DAG: [[LOC20:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;389;9;;\00" +// CHECK-TLS-DEBUG-DAG: [[GS1:@.+]] = internal thread_local global [[S1]] zeroinitializer +// CHECK-TLS-DEBUG-DAG: [[GS2:@.+]] = internal global [[S2]] zeroinitializer +// CHECK-TLS-DEBUG-DAG: [[ARR_X:@.+]] = thread_local global [2 x [3 x [[S1]]]] zeroinitializer +// CHECK-TLS-DEBUG-DAG: [[SM:@.+]] = internal thread_local global [[SMAIN]] zeroinitializer +// CHECK-TLS-DEBUG-DAG: [[STATIC_S:@.+]] = external thread_local global [[S3]] +// CHECK-TLS-DEBUG-DAG: [[GS3:@.+]] = external thread_local global [[S5]] +// CHECK-TLS-DEBUG-DAG: [[ST_INT_ST:@.+]] = linkonce_odr thread_local global i32 23 +// CHECK-TLS-DEBUG-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr thread_local global float 2.300000e+01 +// CHECK-TLS-DEBUG-DAG: [[ST_S4_ST:@.+]] = linkonce_odr thread_local global %struct.S4 zeroinitializer +// CHECK-TLS-DEBUG-DAG: [[LOC1:@1]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;206;9;;\00" +// CHECK-TLS-DEBUG-DAG: [[LOC2:@2]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;296;9;;\00" +// CHECK-TLS-DEBUG-DAG: [[LOC3:@3]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;457;9;;\00" +// CHECK-TLS-DEBUG-DAG: [[LOC4:@4]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;389;9;;\00" struct Static { static S3 s; @@ -168,7 +212,6 @@ // CHECK: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]* // CHECK-NEXT: call {{.*}} [[S1_CTOR]]([[S1]]* [[RES]], {{.*}} 5) -// CHECK: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK: ret i8* [[ARG]] // CHECK-NEXT: } // CHECK: define internal {{.*}}void [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) @@ -182,6 +225,20 @@ // CHECK: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR]]) // CHECK-NEXT: ret void // CHECK-NEXT: } +// CHECK-TLS: define {{.*}} [[S1_CTOR:@.*]]([[S1]]* {{.*}}, +// CHECK-TLS: define {{.*}} [[S1_DTOR:@.*]]([[S1]]* {{.*}}) +// CHECK-TLS: define internal {{.*}}i8* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK-TLS: call {{.*}} [[S1_CTOR]]([[S1]]* [[GS1]], {{.*}} 5) +// CHECK-TLS: ret i8* bitcast ([[S1]]* [[GS1]] to i8*) +// CHECK-TLS-NEXT: } +// CHECK-TLS: define internal {{.*}}void [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK-TLS: call {{.*}} [[S1_DTOR]]([[S1]]* [[GS1]]) +// CHECK-TLS-NEXT: ret void +// CHECK-TLS-NEXT: } +// CHECK-TLS: define internal {{.*}}void [[GS1_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK-TLS: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR]]) +// CHECK-TLS-NEXT: ret void +// CHECK-TLS-NEXT: } // CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC1]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] @@ -192,7 +249,6 @@ // CHECK-DEBUG: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK-DEBUG: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]* // CHECK-DEBUG-NEXT: call {{.*}} [[S1_CTOR:@.+]]([[S1]]* [[RES]], {{.*}} 5) -// CHECK-DEBUG: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK-DEBUG: ret i8* [[ARG]] // CHECK-DEBUG-NEXT: } // CHECK-DEBUG: define {{.*}} [[S1_CTOR]]([[S1]]* {{.*}}, @@ -204,15 +260,38 @@ // CHECK-DEBUG-NEXT: ret void // CHECK-DEBUG-NEXT: } // CHECK-DEBUG: define {{.*}} [[S1_DTOR]]([[S1]]* {{.*}}) +// CHECK-TLS-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-TLS-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-TLS-DEBUG: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC1]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-TLS-DEBUG: @__kmpc_global_thread_num +// CHECK-TLS-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]]) +// CHECK-TLS-DEBUG: define internal {{.*}}i8* [[GS1_CTOR]](i8*) +// CHECK-TLS-DEBUG: call {{.*}} [[S1_CTOR:@.+]]([[S1]]* [[GS1]], {{.*}} 5) +// CHECK-TLS-DEBUG: ret i8* bitcast ([[S1]]* [[GS1]] to i8*) +// CHECK-TLS-DEBUG-NEXT: } +// CHECK-TLS-DEBUG: define {{.*}} [[S1_CTOR]]([[S1]]* {{.*}}, +// CHECK-TLS-DEBUG: define internal {{.*}}void [[GS1_DTOR]](i8*) +// CHECK-TLS-DEBUG: call {{.*}} [[S1_DTOR:@.+]]([[S1]]* [[GS1]]) +// CHECK-TLS-DEBUG-NEXT: ret void +// CHECK-TLS-DEBUG-NEXT: } +// CHECK-TLS-DEBUG: define {{.*}} [[S1_DTOR]]([[S1]]* {{.*}}) static S2 gs2(27); // CHECK: define {{.*}} [[S2_CTOR:@.*]]([[S2]]* {{.*}}, // CHECK: define {{.*}} [[S2_DTOR:@.*]]([[S2]]* {{.*}}) // No another call for S2 constructor because it is not threadprivate // CHECK-NOT: call {{.*}} [[S2_CTOR]]([[S2]]* +// CHECK-TLS: define {{.*}} [[S2_CTOR:@.*]]([[S2]]* {{.*}}, +// CHECK-TLS: define {{.*}} [[S2_DTOR:@.*]]([[S2]]* {{.*}}) +// No another call for S2 constructor because it is not threadprivate +// CHECK-TLS-NOT: call {{.*}} [[S2_CTOR]]([[S2]]* // CHECK-DEBUG: define {{.*}} [[S2_CTOR:@.*]]([[S2]]* {{.*}}, // CHECK-DEBUG: define {{.*}} [[S2_DTOR:@.*]]([[S2]]* {{.*}}) // No another call for S2 constructor because it is not threadprivate // CHECK-DEBUG-NOT: call {{.*}} [[S2_CTOR]]([[S2]]* +// CHECK-TLS-DEBUG: define {{.*}} [[S2_CTOR:@.*]]([[S2]]* {{.*}}, +// CHECK-TLS-DEBUG: define {{.*}} [[S2_DTOR:@.*]]([[S2]]* {{.*}}) +// No another call for S2 constructor because it is not threadprivate +// CHECK-TLS-DEBUG-NOT: call {{.*}} [[S2_CTOR]]([[S2]]* S1 arr_x[2][3] = { { 1, 2, 3 }, { 4, 5, 6 } }; #pragma omp threadprivate(arr_x) // CHECK: define internal {{.*}}i8* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) @@ -233,7 +312,6 @@ // CHECK: invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT]], [[INT]] {{.*}}5) // CHECK: [[ARR_ELEMENT2:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[ARR_ELEMENT]], i{{.*}} 1 // CHECK: invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT2]], [[INT]] {{.*}}6) -// CHECK: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK: ret i8* [[ARG]] // CHECK: } // CHECK: define internal {{.*}}void [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) @@ -255,6 +333,30 @@ // CHECK: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR]]) // CHECK-NEXT: ret void // CHECK-NEXT: } +// CHECK-TLS: define internal {{.*}}i8* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK-TLS: invoke {{.*}} [[S1_CTOR]]([[S1]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 0, i{{.*}} 0), i{{.*}} 1) +// CHECK-TLS: invoke {{.*}} [[S1_CTOR]]([[S1]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 0, i{{.*}} 1), i{{.*}} 2) +// CHECK-TLS: invoke {{.*}} [[S1_CTOR]]([[S1]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 0, i{{.*}} 2), i{{.*}} 3) +// CHECK-TLS: invoke {{.*}} [[S1_CTOR]]([[S1]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 1, i{{.*}} 0), i{{.*}} 4) +// CHECK-TLS: invoke {{.*}} [[S1_CTOR]]([[S1]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 1, i{{.*}} 1), i{{.*}} 5) +// CHECK-TLS: invoke {{.*}} [[S1_CTOR]]([[S1]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 1, i{{.*}} 2), i{{.*}} 6) +// CHECK-TLS: ret i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*) +// CHECK-TLS: } +// CHECK-TLS: define internal {{.*}}void [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK-TLS: br label %[[ARR_LOOP:.*]] +// CHECK-TLS: {{.*}}[[ARR_LOOP]]{{.*}} +// CHECK-TLS-NEXT: [[ARR_ELEMENTPAST:%.*]] = phi [[S1]]* [ getelementptr inbounds ([[S1]], [[S1]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 0, i{{.*}} 0), i{{.*}} 6), {{.*}} ], [ [[ARR_ELEMENT:%.*]], {{.*}} ] +// CHECK-TLS-NEXT: [[ARR_ELEMENT]] = getelementptr inbounds [[S1]], [[S1]]* [[ARR_ELEMENTPAST]], i{{.*}} -1 +// CHECK-TLS-NEXT: invoke {{.*}} [[S1_DTOR]]([[S1]]* [[ARR_ELEMENT]]) +// CHECK-TLS: [[ARR_DONE:%.*]] = icmp eq [[S1]]* [[ARR_ELEMENT]], getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 0, i{{.*}} 0) +// CHECK-TLS-NEXT: br i1 [[ARR_DONE]], label %[[ARR_EXIT:.*]], label %[[ARR_LOOP]] +// CHECK-TLS: {{.*}}[[ARR_EXIT]]{{.*}} +// CHECK-TLS-NEXT: ret void +// CHECK-TLS: } +// CHECK-TLS: define internal {{.*}}void [[ARR_X_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK-TLS: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR]]) +// CHECK-TLS-NEXT: ret void +// CHECK-TLS-NEXT: } // CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC2]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] @@ -264,11 +366,22 @@ // CHECK-DEBUG: } // CHECK-DEBUG: define internal {{.*}}void [[ARR_X_DTOR]](i8*) // CHECK-DEBUG: } +// CHECK-TLS-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-TLS-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-TLS-DEBUG: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC2]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-TLS-DEBUG: @__kmpc_global_thread_num +// CHECK-TLS-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]]) +// CHECK-TLS-DEBUG: define internal {{.*}}i8* [[ARR_X_CTOR]](i8*) +// CHECK-TLS-DEBUG: } +// CHECK-TLS-DEBUG: define internal {{.*}}void [[ARR_X_DTOR]](i8*) +// CHECK-TLS-DEBUG: } extern S5 gs3; #pragma omp threadprivate(gs3) // No call for S5 constructor because gs3 has just declaration, not a definition. // CHECK-NOT: call {{.*}}([[S5]]* +// CHECK-TLS-NOT: call {{.*}}([[S5]]* // CHECK-DEBUG-NOT: call {{.*}}([[S5]]* +// CHECK-TLS-DEBUG-NOT: call {{.*}}([[S5]]* template struct ST { @@ -280,9 +393,12 @@ T ST::st(23); // CHECK-LABEL: @main() +// CHECK-TLS-LABEL: @main() // CHECK-DEBUG-LABEL: @main() +// CHECK-TLS-DEBUG-LABEL: @main() int main() { // CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] + // CHECK-TLS-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] int Res; struct Smain { int a; @@ -312,6 +428,12 @@ // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]] // CHECK-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]]) // CHECK: call {{.*}}void @__cxa_guard_release +// CHECK-TLS: call {{.*}}i{{.*}} @__cxa_guard_acquire +// CHECK-TLS: call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]]) +// CHECK-TLS: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK-TLS: [[GS1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S1]], [[S1]]* [[GS1]], i{{.*}} 0, i{{.*}} 0) +// CHECK-TLS-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}} [[GS1_A]]) +// CHECK-TLS: call {{.*}}void @__cxa_guard_release // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]]) @@ -326,12 +448,20 @@ // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]] // CHECK-DEBUG-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]]) // CHECK-DEBUG: call {{.*}}void @__cxa_guard_release +// CHECK-TLS-DEBUG: call {{.*}}i{{.*}} @__cxa_guard_acquire +// CHECK-TLS-DEBUG: call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]]) +// CHECK-TLS-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK-TLS-DEBUG: [[GS1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S1]], [[S1]]* [[GS1]], i{{.*}} 0, i{{.*}} 0) +// CHECK-TLS-DEBUG-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]]) +// CHECK-TLS-DEBUG: call {{.*}}void @__cxa_guard_release #pragma omp threadprivate(sm) // CHECK: [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[STATIC_S]].cache.) // CHECK-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]* // CHECK-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]], [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0 // CHECK-NEXT: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* [[STATIC_S_A_ADDR]] // CHECK-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] + // CHECK-TLS: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S3]], [[S3]]* [[STATIC_S]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC5]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -339,6 +469,8 @@ // CHECK-DEBUG-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]], [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0 // CHECK-DEBUG-NEXT: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* [[STATIC_S_A_ADDR]] // CHECK-DEBUG-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] + // CHECK-TLS-DEBUG: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S3]], [[S3]]* [[STATIC_S]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] Res = Static::s.a; // CHECK: [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[SM]].cache.) // CHECK-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]* @@ -347,6 +479,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[SM_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[SMAIN]], [[SMAIN]]* [[SM]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC6]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -356,6 +492,10 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[SM_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[SMAIN]], [[SMAIN]]* [[SM]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += sm.a; // CHECK: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.) // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* @@ -364,6 +504,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[GS1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S1]], [[S1]]* [[GS1]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC7]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -373,15 +517,27 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[GS1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S1]], [[S1]]* [[GS1]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += gs1.a; // CHECK: [[GS2_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S2]], [[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[GS2_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S2]], [[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[GS2_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S2]], [[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[GS2_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S2]], [[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += gs2.a; // CHECK: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.) // CHECK-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]* @@ -390,6 +546,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[GS3_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S5]], [[S5]]* [[GS3]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC8]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -399,6 +559,10 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[GS3_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S5]], [[S5]]* [[GS3]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += gs3.a; // CHECK: [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.) // CHECK-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]* @@ -409,6 +573,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[ARR_X_1_1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 1, i{{.*}} 1, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC9]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -420,6 +588,10 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[ARR_X_1_1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 1, i{{.*}} 1, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += arr_x[1][1].a; // CHECK: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.) // CHECK-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]* @@ -427,6 +599,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST]] + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC10]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -435,6 +611,10 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST]] + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += ST::st; // CHECK: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.) // CHECK-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float* @@ -443,6 +623,11 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST]] + // CHECK-TLS-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]] + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC11]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -452,6 +637,11 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST]] + // CHECK-TLS-DEBUG-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]] + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += static_cast(ST::st); // CHECK: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.) // CHECK-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]* @@ -460,6 +650,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[ST_S4_ST_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S4]], [[S4]]* [[ST_S4_ST]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC12]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -469,11 +663,19 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[ST_S4_ST_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S4]], [[S4]]* [[ST_S4_ST]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += ST::st.a; // CHECK: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: ret [[INT]] [[RES]] + // CHECK-TLS: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: ret [[INT]] [[RES]] // CHECK-DEBUG: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: ret [[INT]] [[RES]] + // CHECK-TLS-DEBUG: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: ret [[INT]] [[RES]] return Res; } // CHECK: } @@ -488,7 +690,6 @@ // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]] // CHECK-NEXT: call {{.*}} [[SMAIN_CTOR:@.+]]([[SMAIN]]* [[RES]], [[INT]] {{.*}}[[GS1_A]]) -// CHECK: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK-NEXT: ret i8* [[ARG]] // CHECK-NEXT: } // CHECK: define {{.*}} [[SMAIN_CTOR]]([[SMAIN]]* {{.*}}, @@ -500,6 +701,17 @@ // CHECK-NEXT: ret void // CHECK-NEXT: } // CHECK: define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}}) +// CHECK-TLS: define internal {{.*}}i8* [[SM_CTOR]](i8*) +// CHECK-TLS: [[GS1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S1]], [[S1]]* [[GS1]], i{{.*}} 0, i{{.*}} 0) +// CHECK-TLS-NEXT: call {{.*}} [[SMAIN_CTOR:@.+]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]]) +// CHECK-TLS-NEXT: ret i8* bitcast ([[SMAIN]]* [[SM]] to i8*) +// CHECK-TLS-NEXT: } +// CHECK-TLS: define {{.*}} [[SMAIN_CTOR]]([[SMAIN]]* {{.*}}, +// CHECK-TLS: define internal {{.*}}void [[SM_DTOR]](i8*) +// CHECK-TLS: call {{.*}} [[SMAIN_DTOR:@.+]]([[SMAIN]]* [[SM]]) +// CHECK-TLS-NEXT: ret void +// CHECK-TLS-NEXT: } +// CHECK-TLS: define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}}) // CHECK-DEBUG: define internal {{.*}}i8* [[SM_CTOR]](i8*) // CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 @@ -515,7 +727,6 @@ // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]], [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* [[GS1_A_ADDR]] // CHECK-DEBUG-NEXT: call {{.*}} [[SMAIN_CTOR:@.+]]([[SMAIN]]* [[RES]], [[INT]] {{.*}}[[GS1_A]]) -// CHECK-DEBUG: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK-DEBUG-NEXT: ret i8* [[ARG]] // CHECK-DEBUG-NEXT: } // CHECK-DEBUG: define {{.*}} [[SMAIN_CTOR]]([[SMAIN]]* {{.*}}, @@ -523,12 +734,24 @@ // CHECK-DEBUG: call {{.*}} [[SMAIN_DTOR:@.+]]([[SMAIN]]* // CHECK-DEBUG: } // CHECK-DEBUG: define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}}) +// CHECK-TLS-DEBUG: define internal {{.*}}i8* [[SM_CTOR]](i8*) +// CHECK-TLS-DEBUG: [[GS1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S1]], [[S1]]* [[GS1]], i{{.*}} 0, i{{.*}} 0) +// CHECK-TLS-DEBUG-NEXT: call {{.*}} [[SMAIN_CTOR:@.+]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]]) +// CHECK-TLS-DEBUG-NEXT: ret i8* bitcast ([[SMAIN]]* [[SM]] to i8*) +// CHECK-TLS-DEBUG-NEXT: } +// CHECK-TLS-DEBUG: define {{.*}} [[SMAIN_CTOR]]([[SMAIN]]* {{.*}}, +// CHECK-TLS-DEBUG: define internal {{.*}} [[SM_DTOR:@.+]](i8*) +// CHECK-TLS-DEBUG: call {{.*}} [[SMAIN_DTOR:@.+]]([[SMAIN]]* +// CHECK-TLS-DEBUG: } +// CHECK-TLS-DEBUG: define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}}) #endif #ifdef BODY // CHECK-LABEL: @{{.*}}foobar{{.*}}() +// CHECK-TLS-LABEL: @{{.*}}foobar{{.*}}() // CHECK-DEBUG-LABEL: @{{.*}}foobar{{.*}}() +// CHECK-TLS-DEBUG-LABEL: @{{.*}}foobar{{.*}}() int foobar() { // CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] int Res; @@ -538,6 +761,8 @@ // CHECK-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]], [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0 // CHECK-NEXT: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* [[STATIC_S_A_ADDR]] // CHECK-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] + // CHECK-TLS: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S3]], [[S3]]* [[STATIC_S]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC13]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]]) @@ -548,6 +773,8 @@ // CHECK-DEBUG-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]], [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0 // CHECK-DEBUG-NEXT: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* [[STATIC_S_A_ADDR]] // CHECK-DEBUG-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] + // CHECK-TLS-DEBUG: [[STATIC_S_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S3]], [[S3]]* [[STATIC_S]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] Res = Static::s.a; // CHECK: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.) // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* @@ -556,6 +783,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-NEXT: [[GS1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S1]], [[S1]]* [[GS1]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC14]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -565,15 +796,27 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[GS1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S1]], [[S1]]* [[GS1]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += gs1.a; // CHECK: [[GS2_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S2]], [[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[GS2_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S2]], [[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[GS2_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S2]], [[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[GS2_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S2]], [[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += gs2.a; // CHECK: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.) // CHECK-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]* @@ -582,6 +825,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[GS3_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S5]], [[S5]]* [[GS3]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC15]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -591,6 +838,10 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[GS3_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S5]], [[S5]]* [[GS3]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += gs3.a; // CHECK: [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.) // CHECK-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]* @@ -601,6 +852,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[ARR_X_1_1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 1, i{{.*}} 1, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC16]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -612,6 +867,10 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[ARR_X_1_1_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([2 x [3 x [[S1]]]], [2 x [3 x [[S1]]]]* [[ARR_X]], i{{.*}} 0, i{{.*}} 1, i{{.*}} 1, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += arr_x[1][1].a; // CHECK: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.) // CHECK-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]* @@ -619,6 +878,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST]] + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC17]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -627,6 +890,10 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[ST_INT_ST_VAL:%.*]] = load [[INT]], [[INT]]* [[ST_INT_ST]] + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += ST::st; // CHECK: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.) // CHECK-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float* @@ -635,6 +902,11 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST]] + // CHECK-TLS-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]] + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC18]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -644,6 +916,11 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float, float* [[ST_FLOAT_ST]] + // CHECK-TLS-DEBUG-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]] + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += static_cast(ST::st); // CHECK: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.) // CHECK-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]* @@ -652,6 +929,10 @@ // CHECK-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS: [[ST_S4_ST_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S4]], [[S4]]* [[ST_S4_ST]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] + // CHECK-TLS-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC19]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** @@ -661,11 +942,19 @@ // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-TLS-DEBUG: [[ST_S4_ST_A:%.*]] = load [[INT]], [[INT]]* getelementptr inbounds ([[S4]], [[S4]]* [[ST_S4_ST]], i{{.*}} 0, i{{.*}} 0) + // CHECK-TLS-DEBUG-NEXT: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] + // CHECK-TLS-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] Res += ST::st.a; // CHECK: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-NEXT: ret [[INT]] [[RES]] + // CHECK-TLS: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-NEXT: ret [[INT]] [[RES]] // CHECK-DEBUG: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] // CHECK-DEBUG-NEXT: ret [[INT]] [[RES]] + // CHECK-TLS-DEBUG: [[RES:%.*]] = load [[INT]], [[INT]]* [[RES_ADDR]] + // CHECK-TLS-DEBUG-NEXT: ret [[INT]] [[RES]] return Res; } #endif @@ -676,7 +965,6 @@ // CHECK: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S4]]* // CHECK-NEXT: call {{.*}} [[S4_CTOR:@.+]]([[S4]]* [[RES]], {{.*}} 23) -// CHECK: [[ARG:%.+]] = load i8*, i8** [[ARG_ADDR]] // CHECK-NEXT: ret i8* [[ARG]] // CHECK-NEXT: } // CHECK: define {{.*}} [[S4_CTOR]]([[S4]]* {{.*}}, @@ -688,6 +976,17 @@ // CHECK-NEXT: ret void // CHECK-NEXT: } // CHECK: define {{.*}} [[S4_DTOR]]([[S4]]* {{.*}}) +// CHECK-TLS: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK-TLS: define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8*) +// CHECK-TLS: call {{.*}} [[S4_CTOR:@.+]]([[S4]]* [[ST_S4_ST]], {{.*}} 23) +// CHECK-TLS-NEXT: ret i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*) +// CHECK-TLS-NEXT: } +// CHECK-TLS: define {{.*}} [[S4_CTOR]]([[S4]]* {{.*}}, +// CHECK-TLS: define internal {{.*}}void [[ST_S4_ST_DTOR]](i8*) +// CHECK-TLS: call {{.*}} [[S4_DTOR:@.+]]([[S4]]* [[ST_S4_ST]]) +// CHECK-TLS-NEXT: ret void +// CHECK-TLS-NEXT: } +// CHECK-TLS: define {{.*}} [[S4_DTOR]]([[S4]]* {{.*}}) // CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC20]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] @@ -699,10 +998,27 @@ // CHECK-DEBUG: define internal {{.*}}void [[ST_S4_ST_DTOR]](i8*) // CHECK-DEBUG: } // CHECK-DEBUG: define {{.*}} [[S4_DTOR:@.*]]([[S4]]* {{.*}}) +// CHECK-TLS-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-TLS-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]], [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-TLS-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8], [{{.*}} x i8]* [[LOC4]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-TLS-DEBUG: @__kmpc_global_thread_num +// CHECK-TLS-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK-TLS-DEBUG: define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8*) +// CHECK-TLS-DEBUG: } +// CHECK-TLS-DEBUG: define {{.*}} [[S4_CTOR:@.*]]([[S4]]* {{.*}}, +// CHECK-TLS-DEBUG: define internal {{.*}}void [[ST_S4_ST_DTOR]](i8*) +// CHECK-TLS-DEBUG: } +// CHECK-TLS-DEBUG: define {{.*}} [[S4_DTOR:@.*]]([[S4]]* {{.*}}) // CHECK: define internal {{.*}}void {{@.*}}() // CHECK-DAG: call {{.*}}void [[GS1_INIT]]() // CHECK-DAG: call {{.*}}void [[ARR_X_INIT]]() // CHECK: ret void +// CHECK-TLS: define internal {{.*}}void {{@.*}}() +// CHECK-TLS-DAG: call {{.*}}void [[GS1_INIT]]() +// CHECK-TLS-DAG: call {{.*}}void [[ARR_X_INIT]]() +// CHECK-TLS: ret void // CHECK-DEBUG: define internal {{.*}}void {{@.*}}() // CHECK-DEBUG: ret void +// CHECK-TLS-DEBUG: define internal {{.*}}void {{@.*}}() +// CHECK-TLS-DEBUG: ret void