Index: include/clang/AST/DeclOpenMP.h =================================================================== --- include/clang/AST/DeclOpenMP.h +++ include/clang/AST/DeclOpenMP.h @@ -19,6 +19,7 @@ #include "llvm/ADT/ArrayRef.h" namespace clang { +class Expr; /// \brief This represents '#pragma omp threadprivate ...' directive. /// For example, in the following, both 'a' and 'A::b' are threadprivate: Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -1911,3 +1911,10 @@ let Documentation = [LoopHintDocs, UnrollHintDocs]; } + +def OMPThreadPrivateDecl : InheritableAttr { + // This attribute has no spellings as it is only ever created implicitly. + let Spellings = []; + let SemaHandler = 0; + let Documentation = [Undocumented]; +} Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -7904,7 +7904,9 @@ // We never need to emit an uninstantiated function template. if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate) return false; - } else + } else if (isa(D)) + return true; + else return false; // If this is a member of a class template, we do not need to emit it. Index: lib/CodeGen/CGDeclCXX.cpp =================================================================== --- lib/CodeGen/CGDeclCXX.cpp +++ lib/CodeGen/CGDeclCXX.cpp @@ -14,6 +14,7 @@ #include "CodeGenFunction.h" #include "CGCXXABI.h" #include "CGObjCRuntime.h" +#include "CGOpenMPRuntime.h" #include "clang/Frontend/CodeGenOptions.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/Intrinsics.h" @@ -139,6 +140,10 @@ QualType T = D.getType(); if (!T->isReferenceType()) { + if (getLangOpts().OpenMP && D.hasAttr()) + (void)CGM.getOpenMPRuntime().EmitOMPThreadPrivateVarDefinition( + &D, DeclPtr, D.getAttr()->getLocation(), + this); if (PerformInit) EmitDeclInit(*this, D, DeclPtr); if (CGM.isTypeConstant(D.getType(), true)) Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -16,6 +16,7 @@ #include "CGCall.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CGOpenMPRuntime.h" #include "CGRecordLayout.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -1777,6 +1778,14 @@ return CGF.Builder.CreateBitCast(V, IRType->getPointerTo(AS), Name); } +static LValue EmitThreadPrivateVarDeclLValue( + CodeGenFunction &CGF, const VarDecl *VD, QualType T, llvm::Value *V, + llvm::Type *RealVarTy, CharUnits Alignment, SourceLocation Loc) { + V = CGF.CGM.getOpenMPRuntime().getOMPAddrOfThreadPrivate(CGF, VD, V, Loc); + V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); + return CGF.MakeAddrLValue(V, T, Alignment); +} + static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, const Expr *E, const VarDecl *VD) { QualType T = E->getType(); @@ -1791,6 +1800,11 @@ V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); LValue LV; + // Emit reference to the private copy of the variable if it is an OpenMP + // threadprivate variable. + if (CGF.getLangOpts().OpenMP && VD->hasAttr()) + return EmitThreadPrivateVarDeclLValue(CGF, VD, T, V, RealVarTy, Alignment, + E->getExprLoc()); if (VD->getType()->isReferenceType()) { llvm::LoadInst *LI = CGF.Builder.CreateLoad(V); LI->setAlignment(Alignment.getQuantity()); @@ -1904,6 +1918,12 @@ V = CGM.getOrCreateStaticVarDecl( *VD, CGM.getLLVMLinkageVarDefinition(VD, /*isConstant=*/false)); + // Check if variable is threadprivate. + if (V && getLangOpts().OpenMP && VD->hasAttr()) + return EmitThreadPrivateVarDeclLValue( + *this, VD, T, V, getTypes().ConvertTypeForMem(VD->getType()), + Alignment, E->getExprLoc()); + // Use special handling for lambdas. if (!V) { if (FieldDecl *FD = LambdaCaptureFields.lookup(VD)) { Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -16,6 +16,7 @@ #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" namespace llvm { @@ -23,12 +24,14 @@ class Constant; class Function; class FunctionType; +class GlobalVariable; class StructType; class Type; class Value; } // namespace llvm namespace clang { +class VarDecl; class OMPExecutableDirective; class VarDecl; @@ -62,9 +65,15 @@ OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 }; enum OpenMPRTLFunction { - // Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro - // microtask, ...); + /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// kmpc_micro microtask, ...); OMPRTL__kmpc_fork_call, + /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, + /// kmp_int32 global_tid, void *data, size_t size, void ***cache); + OMPRTL__kmpc_threadprivate_cached, + /// \brief Call to void __kmpc_threadprivate_register( ident_t *, + /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); + OMPRTL__kmpc_threadprivate_register, // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc); OMPRTL__kmpc_global_thread_num, // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, @@ -155,8 +164,9 @@ /// \brief Type kmp_critical_name, originally defined as typedef kmp_int32 /// kmp_critical_name[8]; llvm::ArrayType *KmpCriticalNameTy; - /// \brief Map of critical regions names and the corresponding lock objects. - llvm::StringMap CriticalRegionVarNames; + /// \brief An ordered map of auto-generated variables to their unique names. + llvm::StringMap + OMPInternalVarNames; /// \brief Emits object of ident_t type with info for source location. /// \param Flags Flags for OpenMP location. @@ -176,6 +186,13 @@ /// \return Specified function. llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function); + /// \brief If the specified mangled name is not in the module, create and + /// return threadprivate cache object. This object is a pointer's worth of + /// storage that's reserved for use by the OpenMP runtime. + /// \param D Threadprivate variable. + /// \return Cache variable for the specified threadprivate. + llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD); + /// \brief Emits address of the word in a memory where current thread id is /// stored. virtual llvm::Value *EmitThreadIDAddress(CodeGenFunction &CGF, @@ -185,6 +202,28 @@ /// llvm::Value *GetOpenMPThreadID(CodeGenFunction &CGF, SourceLocation Loc); + /// \brief Gets (if variable with the given name already exist) or creates + /// internal global variable with the specified Name. The created variable has + /// linkage CommonLinkage by default and is initialized by null value. + /// \param Ty Type of the global variable. If it is exist already the type + /// must be the same. + /// \param Name Name of the variable. + llvm::GlobalVariable *GetOrCreateInternalVariable(llvm::Type *Ty, + const llvm::Twine &Name); + + /// \brief Set of threadprivate variables with the generated initializer. + llvm::DenseSet ThreadPrivateWithDefinition; + + /// \brief Emits initialization code for the threadprivate variables. + /// \param VDAddr Address of the global variable \a VD. + /// \param Ctor Pointer to a global init function for \a VD. + /// \param CopyCtor Pointer to a global copy function for \a VD. + /// \param Dtor Pointer to a global destructor function for \a VD. + /// \param Loc Location of threadprivate declaration. + void EmitOMPThreadPrivateVarInit(CodeGenFunction &CGF, llvm::Value *VDAddr, + llvm::Value *Ctor, llvm::Value *CopyCtor, + llvm::Value *Dtor, SourceLocation Loc); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} @@ -261,6 +300,29 @@ virtual void EmitOMPNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc); + + /// \brief Returns address of the threadprivate variable for the current + /// thread. + /// \param D Threadprivate variable. + /// \param VDAddr Address of the global variable \a VD. + /// \param Loc Location of the reference to threadprivate var. + /// \return Address of the threadprivate variable for the current thread. + virtual llvm::Value *getOMPAddrOfThreadPrivate(CodeGenFunction &CGF, + const VarDecl *VD, + llvm::Value *VDAddr, + SourceLocation Loc); + + /// \brief Emit a code for initialization of threadprivate variable. It emits + /// a call to runtime library which adds initial value to the newly created + /// threadprivate variable (if it is not constant) and registers destructor + /// for the variable (if any). + /// \param VD Threadprivate variable. + /// \param VDAddr Address of the global variable \a VD. + /// \param Loc Location of threadprivate declaration. + virtual llvm::Function * + EmitOMPThreadPrivateVarDefinition(const VarDecl *VD, llvm::Value *VDAddr, + SourceLocation Loc, + CodeGenFunction *CGF = nullptr); }; } // namespace CodeGen } // namespace clang Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -271,6 +271,17 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; } + case OMPRTL__kmpc_threadprivate_cached: { + // Build void *__kmpc_threadprivate_cached(ident_t *loc, + // kmp_int32 global_tid, void *data, size_t size, void ***cache); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.VoidPtrTy, CGM.SizeTy, + CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); + break; + } case OMPRTL__kmpc_critical: { // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); @@ -282,6 +293,29 @@ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical"); break; } + case OMPRTL__kmpc_threadprivate_register: { + // Build void __kmpc_threadprivate_register(ident_t *, void *data, + // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); + // typedef void *(*kmpc_ctor)(void *); + auto KmpcCtorTy = + llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, + /*isVarArg*/ false)->getPointerTo(); + // typedef void *(*kmpc_cctor)(void *, void *); + llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto KmpcCopyCtorTy = + llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs, + /*isVarArg*/ false)->getPointerTo(); + // typedef void (*kmpc_dtor)(void *); + auto KmpcDtorTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) + ->getPointerTo(); + llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, + KmpcCopyCtorTy, KmpcDtorTy}; + auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, + /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); + break; + } case OMPRTL__kmpc_end_critical: { // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, // kmp_critical_name *crit); @@ -333,6 +367,159 @@ return RTLFn; } +llvm::Constant * +CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { + // Lookup the entry, lazily creating it if necessary. + return GetOrCreateInternalVariable( + CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD), ".cache.")); +} + +llvm::Value *CGOpenMPRuntime::getOMPAddrOfThreadPrivate(CodeGenFunction &CGF, + const VarDecl *VD, + llvm::Value *VDAddr, + SourceLocation Loc) { + auto VarTy = VDAddr->getType()->getPointerElementType(); + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), + GetOpenMPThreadID(CGF, Loc), + CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy), + CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), + getOrCreateThreadPrivateCache(VD)}; + return CGF.EmitRuntimeCall( + CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); +} + +void CGOpenMPRuntime::EmitOMPThreadPrivateVarInit( + CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor, + llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { + // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime + // library. + auto OMPLoc = EmitOpenMPUpdateLocation(CGF, Loc); + CGF.EmitRuntimeCall(CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), + OMPLoc); + // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) + // to register constructor/destructor for variable. + llvm::Value *Args[] = {OMPLoc, + CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy), + Ctor, CopyCtor, Dtor}; + CGF.EmitRuntimeCall(CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_threadprivate_register), + Args); +} + +llvm::Function *CGOpenMPRuntime::EmitOMPThreadPrivateVarDefinition( + const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc, + CodeGenFunction *CGF) { + VD = VD->getDefinition(CGM.getContext()); + if (VD && ThreadPrivateWithDefinition.count(VD) == 0) { + ThreadPrivateWithDefinition.insert(VD); + QualType ASTTy = VD->getType(); + + llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; + auto Init = VD->getAnyInitializer(); + if (CGM.getLangOpts().CPlusPlus && Init && + !Init->isConstantInitializer(CGM.getContext(), + /*ForRef=*/false)) { + // Generate function that re-emits the declaration's initializer into the + // threadprivate copy of the variable VD + CodeGenFunction CtorCGF(CGM); + FunctionArgList Args; + ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, CGM.getContext().VoidPtrTy); + Args.push_back(&Dst); + + auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( + CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), + /*isVariadic=*/false); + auto FTy = CGM.getTypes().GetFunctionType(FI); + auto Fn = CGM.CreateGlobalInitOrDestructFunction( + FTy, ".__kmpc_global_ctor_.", Loc); + CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, + Args, SourceLocation()); + auto ArgVal = CtorCGF.EmitLoadOfScalar( + CtorCGF.GetAddrOfLocalVar(&Dst), + /*Volatile=*/false, CGM.PointerAlignInBytes, + CGM.getContext().VoidPtrTy, Dst.getLocation()); + auto Arg = CtorCGF.Builder.CreatePointerCast( + ArgVal, + CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy))); + CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), + /*IsInitializer=*/true); + ArgVal = CtorCGF.EmitLoadOfScalar( + CtorCGF.GetAddrOfLocalVar(&Dst), + /*Volatile=*/false, CGM.PointerAlignInBytes, + CGM.getContext().VoidPtrTy, Dst.getLocation()); + CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); + CtorCGF.FinishFunction(); + Ctor = Fn; + } + if (VD->getType().isDestructedType() != QualType::DK_none) { + // Generate function that emits destructor call for the threadprivate copy + // of the variable VD + CodeGenFunction DtorCGF(CGM); + FunctionArgList Args; + ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, SourceLocation(), + /*Id=*/nullptr, CGM.getContext().VoidPtrTy); + Args.push_back(&Dst); + + auto &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( + CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), + /*isVariadic=*/false); + auto FTy = CGM.getTypes().GetFunctionType(FI); + auto Fn = CGM.CreateGlobalInitOrDestructFunction( + FTy, ".__kmpc_global_dtor_.", Loc); + DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, + SourceLocation()); + auto ArgVal = DtorCGF.EmitLoadOfScalar( + DtorCGF.GetAddrOfLocalVar(&Dst), + /*Volatile=*/false, CGM.PointerAlignInBytes, + CGM.getContext().VoidPtrTy, Dst.getLocation()); + DtorCGF.emitDestroy(ArgVal, ASTTy, + DtorCGF.getDestroyer(ASTTy.isDestructedType()), + DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); + DtorCGF.FinishFunction(); + Dtor = Fn; + } + // Do not emit init function if it is not required. + if (!Ctor && !Dtor) + return nullptr; + + llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto CopyCtorTy = + llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, + /*isVarArg=*/false)->getPointerTo(); + // Copying constructor for the threadprivate variable. + // Must be NULL - reserved by runtime, but currently it requires that this + // parameter is always NULL. Otherwise it fires assertion. + CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); + if (Ctor == nullptr) { + auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, + /*isVarArg=*/false)->getPointerTo(); + Ctor = llvm::Constant::getNullValue(CtorTy); + } + if (Dtor == nullptr) { + auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, + /*isVarArg=*/false)->getPointerTo(); + Dtor = llvm::Constant::getNullValue(DtorTy); + } + if (!CGF) { + auto InitFunctionTy = + llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); + auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( + InitFunctionTy, ".__omp_threadprivate_init_."); + CodeGenFunction InitCGF(CGM); + FunctionArgList ArgList; + InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, + CGM.getTypes().arrangeNullaryFunction(), ArgList, + Loc); + EmitOMPThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); + InitCGF.FinishFunction(); + return InitFunction; + } + EmitOMPThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); + } + return nullptr; +} + void CGOpenMPRuntime::EmitOMPParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, @@ -398,21 +585,31 @@ return ThreadIDTemp; } -llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) { +llvm::GlobalVariable * +CGOpenMPRuntime::GetOrCreateInternalVariable(llvm::Type *Ty, + const llvm::Twine &Name) { SmallString<256> Buffer; llvm::raw_svector_ostream Out(Buffer); - Out << ".gomp_critical_user_" << CriticalName << ".var"; - auto RuntimeCriticalName = Out.str(); - auto &Elem = CriticalRegionVarNames.GetOrCreateValue(RuntimeCriticalName); - if (Elem.getValue() != nullptr) + Out << Name; + auto RuntimeName = Out.str(); + auto &Elem = OMPInternalVarNames.GetOrCreateValue(RuntimeName); + if (Elem.getValue()) { + assert(Elem.getValue()->getType()->getPointerElementType() == Ty && + "OMP internal variable has different type than requested"); return Elem.getValue(); + } - auto Lock = new llvm::GlobalVariable( - CGM.getModule(), KmpCriticalNameTy, /*IsConstant*/ false, + auto Item = new llvm::GlobalVariable( + CGM.getModule(), Ty, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage, - llvm::Constant::getNullValue(KmpCriticalNameTy), Elem.getKey()); - Elem.setValue(Lock); - return Lock; + llvm::Constant::getNullValue(Ty), Elem.getKey()); + Elem.setValue(Item); + return Item; +} + +llvm::Value *CGOpenMPRuntime::GetCriticalRegionLock(StringRef CriticalName) { + llvm::Twine Name(".gomp_critical_user_", CriticalName); + return GetOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var")); } void CGOpenMPRuntime::EmitOMPCriticalRegionStart(CodeGenFunction &CGF, Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -1087,6 +1087,11 @@ void setAliasAttributes(const Decl *D, llvm::GlobalValue *GV); void addReplacement(StringRef Name, llvm::Constant *C); + + /// \brief Emit a code for threadprivate directive. + /// \param D Threadprivate declaration. + void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D); + private: llvm::Constant * Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -3218,6 +3218,10 @@ break; } + case Decl::OMPThreadPrivate: + EmitOMPThreadPrivateDecl(cast(D)); + break; + case Decl::ClassTemplateSpecialization: { const auto *Spec = cast(D); if (DebugInfo && @@ -3494,3 +3498,13 @@ return getCXXABI().getAddrOfRTTIDescriptor(Ty); } +void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { + for (auto RefExpr : D->varlists()) { + auto *VD = cast(cast(RefExpr)->getDecl()); + if (auto InitFunction = + getOpenMPRuntime().EmitOMPThreadPrivateVarDefinition( + VD, GetAddrOfGlobalVar(VD), RefExpr->getLocStart())) + CXXGlobalInits.push_back(InitFunction); + } +} + Index: lib/Parse/Parser.cpp =================================================================== --- lib/Parse/Parser.cpp +++ lib/Parse/Parser.cpp @@ -624,8 +624,7 @@ HandlePragmaOpenCLExtension(); return DeclGroupPtrTy(); case tok::annot_pragma_openmp: - ParseOpenMPDeclarativeDirective(); - return DeclGroupPtrTy(); + return ParseOpenMPDeclarativeDirective(); case tok::annot_pragma_ms_pointers_to_members: HandlePragmaMSPointersToMembers(); return DeclGroupPtrTy(); Index: lib/Sema/SemaOpenMP.cpp =================================================================== --- lib/Sema/SemaOpenMP.cpp +++ lib/Sema/SemaOpenMP.cpp @@ -848,6 +848,8 @@ Vars.push_back(RefExpr); DSAStack->addDSA(VD, DE, OMPC_threadprivate); + VD->addAttr(OMPThreadPrivateDeclAttr::CreateImplicit( + Context, SourceRange(Loc, Loc))); } OMPThreadPrivateDecl *D = nullptr; if (!Vars.empty()) { Index: lib/Serialization/ASTReaderDecl.cpp =================================================================== --- lib/Serialization/ASTReaderDecl.cpp +++ lib/Serialization/ASTReaderDecl.cpp @@ -2274,7 +2274,8 @@ if (isa(D) || isa(D) || isa(D) || - isa(D)) + isa(D) || + isa(D)) return true; if (VarDecl *Var = dyn_cast(D)) return Var->isFileVarDecl() && Index: test/OpenMP/threadprivate_codegen.cpp =================================================================== --- test/OpenMP/threadprivate_codegen.cpp +++ test/OpenMP/threadprivate_codegen.cpp @@ -0,0 +1,556 @@ +// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// CHECK-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] } +// CHECK-DAG: [[S2:%.+]] = type { [[INT]], double } +// CHECK-DAG: [[S3:%.+]] = type { [[INT]], float } +// CHECK-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] } +// CHECK-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] } +// CHECK-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double } +// CHECK-DEBUG-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DEBUG-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] } +// CHECK-DEBUG-DAG: [[S2:%.+]] = type { [[INT]], double } +// CHECK-DEBUG-DAG: [[S3:%.+]] = type { [[INT]], float } +// CHECK-DEBUG-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] } +// CHECK-DEBUG-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] } +// CHECK-DEBUG-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double } + +struct S1 { + int a; + S1() + : a(0) { + } + S1(int a) + : a(a) { + } + S1(const S1 &s) { + a = 12 + s.a; + } + ~S1() { + a = 0; + } +}; + +struct S2 { + int a; + double b; + S2() + : a(0) { + } + S2(int a) + : a(a) { + } + S2(const S2 &s) { + a = 12 + s.a; + } + ~S2() { + a = 0; + } +}; + +struct S3 { + int a; + float b; + S3() + : a(0) { + } + S3(int a) + : a(a) { + } + S3(const S3 &s) { + a = 12 + s.a; + } + ~S3() { + a = 0; + } +}; + +struct S4 { + int a, b; + S4() + : a(0) { + } + S4(int a) + : a(a) { + } + S4(const S4 &s) { + a = 12 + s.a; + } + ~S4() { + a = 0; + } +}; + +struct S5 { + int a, b, c; + S5() + : a(0) { + } + S5(int a) + : a(a) { + } + S5(const S5 &s) { + a = 12 + s.a; + } + ~S5() { + a = 0; + } +}; + +// CHECK-DAG: [[GS1:@.+]] = internal global [[S1]] zeroinitializer +// CHECK-DAG: [[GS1]].cache. = common global i8** null +// CHECK-DAG: [[DEFAULT_LOC:@.+]] = private unnamed_addr constant [[IDENT]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8]* {{@.+}}, i32 0, i32 0) } +// CHECK-DAG: [[GS2:@.+]] = internal global [[S2]] zeroinitializer +// CHECK-DAG: [[ARR_X:@.+]] = global [2 x [3 x [[S1]]]] zeroinitializer +// CHECK-DAG: [[ARR_X]].cache. = common global i8** null +// CHECK-DAG: [[SM:@.+]] = internal global [[SMAIN]] zeroinitializer +// CHECK-DAG: [[SM]].cache. = common global i8** null +// CHECK-DAG: [[STATIC_S:@.+]] = external global [[S3]] +// CHECK-DAG: [[STATIC_S]].cache. = common global i8** null +// CHECK-DAG: [[GS3:@.+]] = external global [[S5]] +// CHECK-DAG: [[GS3]].cache. = common global i8** null +// CHECK-DAG: [[ST_INT_ST:@.+]] = linkonce_odr global i32 23 +// CHECK-DAG: [[ST_INT_ST]].cache. = common global i8** null +// CHECK-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01 +// CHECK-DAG: [[ST_FLOAT_ST]].cache. = common global i8** null +// CHECK-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer +// CHECK-DAG: [[ST_S4_ST]].cache. = common global i8** null +// CHECK-NOT: .cache. = common global i8** null +// There is no cache for gs2 - it is not threadprivate. Check that there is only +// 8 caches created (for Static::s, gs1, gs3, arr_x, main::sm, ST::st, +// ST::st, ST::st) +// CHECK-DEBUG-DAG: [[GS1:@.+]] = internal global [[S1]] zeroinitializer +// CHECK-DEBUG-DAG: [[GS2:@.+]] = internal global [[S2]] zeroinitializer +// CHECK-DEBUG-DAG: [[ARR_X:@.+]] = global [2 x [3 x [[S1]]]] zeroinitializer +// CHECK-DEBUG-DAG: [[SM:@.+]] = internal global [[SMAIN]] zeroinitializer +// CHECK-DEBUG-DAG: [[STATIC_S:@.+]] = external global [[S3]] +// CHECK-DEBUG-DAG: [[GS3:@.+]] = external global [[S5]] +// CHECK-DEBUG-DAG: [[ST_INT_ST:@.+]] = linkonce_odr global i32 23 +// CHECK-DEBUG-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01 +// CHECK-DEBUG-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer +// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;155;9;;\00" +// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;209;9;;\00" +// CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;296;19;;\00" +// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;321;9;;\00" +// CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;334;9;;\00" +// CHECK-DEBUG-DAG: [[LOC6:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;351;10;;\00" +// CHECK-DEBUG-DAG: [[LOC7:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;368;10;;\00" +// CHECK-DEBUG-DAG: [[LOC8:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;394;10;;\00" +// CHECK-DEBUG-DAG: [[LOC9:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;415;10;;\00" +// CHECK-DEBUG-DAG: [[LOC10:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;430;10;;\00" +// CHECK-DEBUG-DAG: [[LOC11:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;447;27;;\00" +// CHECK-DEBUG-DAG: [[LOC12:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;464;10;;\00" +// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;268;9;;\00" + +struct Static { + static S3 s; +#pragma omp threadprivate(s) +}; + +static S1 gs1(5); +#pragma omp threadprivate(gs1) +// CHECK: define {{.*}} [[S1_CTOR:@.*]]([[S1]]* {{.*}}, +// CHECK: define {{.*}} [[S1_DTOR:@.*]]([[S1]]* {{.*}}) +// CHECK: define internal {{.*}}i8* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]* +// CHECK-NEXT: call {{.*}} [[S1_CTOR]]([[S1]]* [[RES]], {{.*}} 5) +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: ret i8* [[ARG]] +// CHECK-NEXT: } +// CHECK: define internal {{.*}}void [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]* +// CHECK-NEXT: call {{.*}} [[S1_DTOR]]([[S1]]* [[RES]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK: define internal {{.*}}void [[GS1_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC1]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: @__kmpc_global_thread_num +// CHECK-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]]) +// CHECK-DEBUG: define internal {{.*}}i8* [[GS1_CTOR]](i8*) +// CHECK-DEBUG: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK-DEBUG: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK-DEBUG: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]* +// CHECK-DEBUG-NEXT: call {{.*}} [[S1_CTOR:@.+]]([[S1]]* [[RES]], {{.*}} 5) +// CHECK-DEBUG: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK-DEBUG: ret i8* [[ARG]] +// CHECK-DEBUG-NEXT: } +// CHECK-DEBUG: define {{.*}} [[S1_CTOR]]([[S1]]* {{.*}}, +// CHECK-DEBUG: define internal {{.*}}void [[GS1_DTOR]](i8*) +// CHECK-DEBUG: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK-DEBUG: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK-DEBUG: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S1]]* +// CHECK-DEBUG-NEXT: call {{.*}} [[S1_DTOR:@.+]]([[S1]]* [[RES]]) +// CHECK-DEBUG-NEXT: ret void +// CHECK-DEBUG-NEXT: } +// CHECK-DEBUG: define {{.*}} [[S1_DTOR]]([[S1]]* {{.*}}) +static S2 gs2(27); +// CHECK: define {{.*}} [[S2_CTOR:@.*]]([[S2]]* {{.*}}, +// CHECK: define {{.*}} [[S2_DTOR:@.*]]([[S2]]* {{.*}}) +// No another call for S2 constructor because it is not threadprivate +// CHECK-NOT: call {{.*}} [[S2_CTOR]]([[S2]]* +// CHECK-DEBUG: define {{.*}} [[S2_CTOR:@.*]]([[S2]]* {{.*}}, +// CHECK-DEBUG: define {{.*}} [[S2_DTOR:@.*]]([[S2]]* {{.*}}) +// No another call for S2 constructor because it is not threadprivate +// CHECK-DEBUG-NOT: call {{.*}} [[S2_CTOR]]([[S2]]* +S1 arr_x[2][3] = { { 1, 2, 3 }, { 4, 5, 6 } }; +#pragma omp threadprivate(arr_x) +// CHECK: define internal {{.*}}i8* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [2 x [3 x [[S1]]]]* +// CHECK: [[ARR1:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[RES]], i{{.*}} 0, i{{.*}} 0 +// CHECK: [[ARR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR1]], i{{.*}} 0, i{{.*}} 0 +// CHECK: invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR]], [[INT]] {{.*}}1) +// CHECK: [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR]], i{{.*}} 1 +// CHECK: invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT]], [[INT]] {{.*}}2) +// CHECK: [[ARR_ELEMENT2:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENT]], i{{.*}} 1 +// CHECK: invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT2]], [[INT]] {{.*}}3) +// CHECK: [[ARR_ELEMENT3:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR1]], i{{.*}} 1 +// CHECK: [[ARR_:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_ELEMENT3]], i{{.*}} 0, i{{.*}} 0 +// CHECK: invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_]], [[INT]] {{.*}}4) +// CHECK: [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR_]], i{{.*}} 1 +// CHECK: invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT]], [[INT]] {{.*}}5) +// CHECK: [[ARR_ELEMENT2:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENT]], i{{.*}} 1 +// CHECK: invoke {{.*}} [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT2]], [[INT]] {{.*}}6) +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: ret i8* [[ARG]] +// CHECK: } +// CHECK: define internal {{.*}}void [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: [[ARR_BEGIN:%.*]] = bitcast i8* [[ARG]] to [[S1]]* +// CHECK-NEXT: [[ARR_CUR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_BEGIN]], i{{.*}} 6 +// CHECK-NEXT: br label %[[ARR_LOOP:.*]] +// CHECK: {{.*}}[[ARR_LOOP]]{{.*}} +// CHECK-NEXT: [[ARR_ELEMENTPAST:%.*]] = phi [[S1]]* [ [[ARR_CUR]], {{.*}} ], [ [[ARR_ELEMENT:%.*]], {{.*}} ] +// CHECK-NEXT: [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENTPAST]], i{{.*}} -1 +// CHECK-NEXT: invoke {{.*}} [[S1_DTOR]]([[S1]]* [[ARR_ELEMENT]]) +// CHECK: [[ARR_DONE:%.*]] = icmp eq [[S1]]* [[ARR_ELEMENT]], [[ARR_BEGIN]] +// CHECK-NEXT: br i1 [[ARR_DONE]], label %[[ARR_EXIT:.*]], label %[[ARR_LOOP]] +// CHECK: {{.*}}[[ARR_EXIT]]{{.*}} +// CHECK-NEXT: ret void +// CHECK: } +// CHECK: define internal {{.*}}void [[ARR_X_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC2]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: @__kmpc_global_thread_num +// CHECK-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]]) +// CHECK-DEBUG: define internal {{.*}}i8* [[ARR_X_CTOR]](i8*) +// CHECK-DEBUG: } +// CHECK-DEBUG: define internal {{.*}}void [[ARR_X_DTOR]](i8*) +// CHECK-DEBUG: } +extern S5 gs3; +#pragma omp threadprivate(gs3) +// No call for S5 constructor because gs3 has just declaration, not a definition. +// CHECK-NOT: call {{.*}}([[S5]]* +// CHECK-DEBUG-NOT: call {{.*}}([[S5]]* + +template +struct ST { + static T st; +#pragma omp threadprivate(st) +}; + +template +T ST::st(23); + +// CHECK-LABEL: @main() +// CHECK-DEBUG-LABEL: @main() +int main() { + // CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] + int Res; + struct Smain { + int a; + double b, c; + Smain() + : a(0) { + } + Smain(int a) + : a(a) { + } + Smain(const Smain &s) { + a = 12 + s.a; + } + ~Smain() { + a = 0; + } + }; + + static Smain sm(gs1.a); +// CHECK: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]]) +// CHECK: call {{.*}}i{{.*}} @__cxa_guard_acquire +// CHECK: call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]]) +// CHECK: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.) +// CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* +// CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 +// CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] +// CHECK-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]]) +// CHECK: call {{.*}}void @__cxa_guard_release +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]]) +// CHECK-DEBUG: call {{.*}}i{{.*}} @__cxa_guard_acquire +// CHECK-DEBUG: call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]]) +// CHECK-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** +// CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* +// CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 +// CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] +// CHECK-DEBUG-NEXT: invoke {{.*}} [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] {{.*}}[[GS1_A]]) +// CHECK-DEBUG: call {{.*}}void @__cxa_guard_release +#pragma omp threadprivate(sm) + // CHECK: [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[STATIC_S]].cache.) + // CHECK-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]* + // CHECK-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[STATIC_S_A:%.*]] = load [[INT]]* [[STATIC_S_A_ADDR]] + // CHECK-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC5]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[STATIC_S_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]* + // CHECK-DEBUG-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[STATIC_S_A:%.*]] = load [[INT]]* [[STATIC_S_A_ADDR]] + // CHECK-DEBUG-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] + Res = Static::s.a; + // CHECK: [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[SM]].cache.) + // CHECK-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]* + // CHECK-NEXT: [[SM_A_ADDR:%.*]] = getelementptr inbounds [[SMAIN]]* [[SM_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[SM_A:%.*]] = load [[INT]]* [[SM_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC6]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[SM_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]* + // CHECK-DEBUG-NEXT: [[SM_A_ADDR:%.*]] = getelementptr inbounds [[SMAIN]]* [[SM_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[SM_A:%.*]] = load [[INT]]* [[SM_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += sm.a; + // CHECK: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.) + // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* + // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC7]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* + // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += gs1.a; + // CHECK: [[GS2_A:%.*]] = load [[INT]]* getelementptr inbounds ([[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[GS2_A:%.*]] = load [[INT]]* getelementptr inbounds ([[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += gs2.a; + // CHECK: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS3]].cache.) + // CHECK-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]* + // CHECK-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[GS3_A:%.*]] = load [[INT]]* [[GS3_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC8]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]* + // CHECK-DEBUG-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[GS3_A:%.*]] = load [[INT]]* [[GS3_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += gs3.a; + // CHECK: [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ARR_X]].cache.) + // CHECK-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]* + // CHECK-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1 + // CHECK-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1 + // CHECK-NEXT: [[ARR_X_1_1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_X_1_1_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[ARR_X_1_1_A:%.*]] = load [[INT]]* [[ARR_X_1_1_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC9]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[ARR_X_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]* + // CHECK-DEBUG-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1 + // CHECK-DEBUG-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1 + // CHECK-DEBUG-NEXT: [[ARR_X_1_1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_X_1_1_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[ARR_X_1_1_A:%.*]] = load [[INT]]* [[ARR_X_1_1_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += arr_x[1][1].a; + // CHECK: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_INT_ST]].cache.) + // CHECK-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]* + // CHECK-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]]* [[ST_INT_ST_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC10]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]* + // CHECK-DEBUG-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]]* [[ST_INT_ST_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += ST::st; + // CHECK: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_FLOAT_ST]].cache.) + // CHECK-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float* + // CHECK-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float* [[ST_FLOAT_ST_ADDR]] + // CHECK-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC11]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float* + // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float* [[ST_FLOAT_ST_ADDR]] + // CHECK-DEBUG-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += static_cast(ST::st); + // CHECK: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[ST_S4_ST]].cache.) + // CHECK-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]* + // CHECK-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]]* [[ST_S4_ST_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC12]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]* + // CHECK-DEBUG-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]]* [[ST_S4_ST_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += ST::st.a; + // CHECK: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: ret [[INT]] [[RES]] + // CHECK-DEBUG: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: ret [[INT]] [[RES]] + return Res; +} +// CHECK: } + +// CHECK: define internal {{.*}}i8* [[SM_CTOR]](i8*) +// CHECK: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]]) +// CHECK: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [[SMAIN]]* +// CHECK: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[GS1]].cache.) +// CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* +// CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 +// CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] +// CHECK-NEXT: call {{.*}} [[SMAIN_CTOR:@.+]]([[SMAIN]]* [[RES]], [[INT]] {{.*}}[[GS1_A]]) +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK-NEXT: ret i8* [[ARG]] +// CHECK-NEXT: } +// CHECK: define {{.*}} [[SMAIN_CTOR]]([[SMAIN]]* {{.*}}, +// CHECK: define internal {{.*}}void [[SM_DTOR]](i8*) +// CHECK: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [[SMAIN]]* +// CHECK-NEXT: call {{.*}} [[SMAIN_DTOR:@.+]]([[SMAIN]]* [[RES]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK: define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}}) +// CHECK-DEBUG: define internal {{.*}}i8* [[SM_CTOR]](i8*) +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call {{.*}}i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]]) +// CHECK-DEBUG: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK-DEBUG: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK-DEBUG: [[RES:%.*]] = bitcast i8* [[ARG]] to [[SMAIN]]* +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: [[GS1_TEMP_ADDR:%.*]] = call {{.*}}i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** +// CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* +// CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 +// CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] +// CHECK-DEBUG-NEXT: call {{.*}} [[SMAIN_CTOR:@.+]]([[SMAIN]]* [[RES]], [[INT]] {{.*}}[[GS1_A]]) +// CHECK-DEBUG: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK-DEBUG-NEXT: ret i8* [[ARG]] +// CHECK-DEBUG-NEXT: } +// CHECK-DEBUG: define {{.*}} [[SMAIN_CTOR]]([[SMAIN]]* {{.*}}, +// CHECK-DEBUG: define internal {{.*}} [[SM_DTOR:@.+]](i8*) +// CHECK-DEBUG: call {{.*}} [[SMAIN_DTOR:@.+]]([[SMAIN]]* +// CHECK-DEBUG: } +// CHECK-DEBUG: define {{.*}} [[SMAIN_DTOR]]([[SMAIN]]* {{.*}}) + +// CHECK: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK: define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8*) +// CHECK: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S4]]* +// CHECK-NEXT: call {{.*}} [[S4_CTOR:@.+]]([[S4]]* [[RES]], {{.*}} 23) +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK-NEXT: ret i8* [[ARG]] +// CHECK-NEXT: } +// CHECK: define {{.*}} [[S4_CTOR]]([[S4]]* {{.*}}, +// CHECK: define internal {{.*}}void [[ST_S4_ST_DTOR]](i8*) +// CHECK: store i8* %0, i8** [[ARG_ADDR:%.*]], +// CHECK: [[ARG:%.+]] = load i8** [[ARG_ADDR]] +// CHECK: [[RES:%.*]] = bitcast i8* [[ARG]] to [[S4]]* +// CHECK-NEXT: call {{.*}} [[S4_DTOR:@.+]]([[S4]]* [[RES]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK: define {{.*}} [[S4_DTOR]]([[S4]]* {{.*}}) +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC13]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: @__kmpc_global_thread_num +// CHECK-DEBUG: call {{.*}}void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..+]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..+]]) +// CHECK-DEBUG: define internal {{.*}}i8* [[ST_S4_ST_CTOR]](i8*) +// CHECK-DEBUG: } +// CHECK-DEBUG: define {{.*}} [[S4_CTOR:@.*]]([[S4]]* {{.*}}, +// CHECK-DEBUG: define internal {{.*}}void [[ST_S4_ST_DTOR]](i8*) +// CHECK-DEBUG: } +// CHECK-DEBUG: define {{.*}} [[S4_DTOR:@.*]]([[S4]]* {{.*}}) + +// CHECK: define internal {{.*}}void {{@.*}}() +// CHECK-DAG: call {{.*}}void [[GS1_INIT]]() +// CHECK-DAG: call {{.*}}void [[ARR_X_INIT]]() +// CHECK: ret void +// CHECK-DEBUG: define internal {{.*}}void {{@.*}}() +// CHECK-DEBUG: ret void +#endif +