Index: include/clang/AST/DeclOpenMP.h =================================================================== --- include/clang/AST/DeclOpenMP.h +++ include/clang/AST/DeclOpenMP.h @@ -19,6 +19,7 @@ #include "llvm/ADT/ArrayRef.h" namespace clang { +class Expr; /// \brief This represents '#pragma omp threadprivate ...' directive. /// For example, in the following, both 'a' and 'A::b' are threadprivate: Index: lib/AST/ASTContext.cpp =================================================================== --- lib/AST/ASTContext.cpp +++ lib/AST/ASTContext.cpp @@ -7895,7 +7895,9 @@ // We never need to emit an uninstantiated function template. if (FD->getTemplatedKind() == FunctionDecl::TK_FunctionTemplate) return false; - } else + } else if (isa(D)) + return true; + else return false; // If this is a member of a class template, we do not need to emit it. Index: lib/CodeGen/CGDecl.cpp =================================================================== --- lib/CodeGen/CGDecl.cpp +++ lib/CodeGen/CGDecl.cpp @@ -19,6 +19,7 @@ #include "clang/AST/CharUnits.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenMP.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" @@ -86,11 +87,12 @@ case Decl::StaticAssert: // static_assert(X, ""); [C++0x] case Decl::Label: // __label__ x; case Decl::Import: - case Decl::OMPThreadPrivate: case Decl::Empty: // None of these decls require codegen support. return; - + case Decl::OMPThreadPrivate: + CGM.EmitOMPThreadPrivateDecl(cast(&D)); + return; case Decl::NamespaceAlias: if (CGDebugInfo *DI = getDebugInfo()) DI->EmitNamespaceAlias(cast(D)); Index: lib/CodeGen/CGDeclCXX.cpp =================================================================== --- lib/CodeGen/CGDeclCXX.cpp +++ lib/CodeGen/CGDeclCXX.cpp @@ -155,12 +155,6 @@ EmitStoreOfScalar(RV.getScalarVal(), DeclPtr, false, Alignment, T); } -static llvm::Function * -CreateGlobalInitOrDestructFunction(CodeGenModule &CGM, - llvm::FunctionType *ty, - const Twine &name, - bool TLS = false); - /// Create a stub function, suitable for being passed to atexit, /// which passes the given address to the given destructor function. static llvm::Constant *createAtExitStub(CodeGenModule &CGM, const VarDecl &VD, @@ -174,7 +168,7 @@ CGM.getCXXABI().getMangleContext().mangleDynamicAtExitDestructor(&VD, Out); } llvm::Function *fn = - CreateGlobalInitOrDestructFunction(CGM, ty, FnName.str()); + CGM.CreateGlobalInitOrDestructFunction(ty, FnName.str()); CodeGenFunction CGF(CGM); @@ -226,31 +220,29 @@ CGM.getCXXABI().EmitGuardedInit(*this, D, DeclPtr, PerformInit); } -static llvm::Function * -CreateGlobalInitOrDestructFunction(CodeGenModule &CGM, - llvm::FunctionType *FTy, - const Twine &Name, bool TLS) { +llvm::Function *CodeGenModule::CreateGlobalInitOrDestructFunction( + llvm::FunctionType *FTy, const Twine &Name, bool TLS) { llvm::Function *Fn = llvm::Function::Create(FTy, llvm::GlobalValue::InternalLinkage, - Name, &CGM.getModule()); - if (!CGM.getLangOpts().AppleKext && !TLS) { + Name, &getModule()); + if (!getLangOpts().AppleKext && !TLS) { // Set the section if needed. if (const char *Section = - CGM.getTarget().getStaticInitSectionSpecifier()) + getTarget().getStaticInitSectionSpecifier()) Fn->setSection(Section); } - Fn->setCallingConv(CGM.getRuntimeCC()); + Fn->setCallingConv(getRuntimeCC()); - if (!CGM.getLangOpts().Exceptions) + if (!getLangOpts().Exceptions) Fn->setDoesNotThrow(); - if (!CGM.getSanitizerBlacklist().isIn(*Fn)) { - if (CGM.getLangOpts().Sanitize.Address) + if (!getSanitizerBlacklist().isIn(*Fn)) { + if (getLangOpts().Sanitize.Address) Fn->addFnAttr(llvm::Attribute::SanitizeAddress); - if (CGM.getLangOpts().Sanitize.Thread) + if (getLangOpts().Sanitize.Thread) Fn->addFnAttr(llvm::Attribute::SanitizeThread); - if (CGM.getLangOpts().Sanitize.Memory) + if (getLangOpts().Sanitize.Memory) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); } @@ -296,7 +288,7 @@ // Create a variable initialization function. llvm::Function *Fn = - CreateGlobalInitOrDestructFunction(*this, FTy, FnName.str()); + CreateGlobalInitOrDestructFunction(FTy, FnName.str()); auto *ISA = D->getAttr(); CodeGenFunction(*this).GenerateCXXGlobalVarDeclInitFunc(Fn, D, Addr, @@ -350,7 +342,7 @@ if (!CXXThreadLocalInits.empty()) { // Generate a guarded initialization function. llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false); - InitFn = CreateGlobalInitOrDestructFunction(*this, FTy, "__tls_init", + InitFn = CreateGlobalInitOrDestructFunction(FTy, "__tls_init", /*TLS*/ true); llvm::GlobalVariable *Guard = new llvm::GlobalVariable( getModule(), Int8Ty, false, llvm::GlobalVariable::InternalLinkage, @@ -399,7 +391,7 @@ // Priority is always <= 65535 (enforced by sema). PrioritySuffix = std::string(6-PrioritySuffix.size(), '0')+PrioritySuffix; llvm::Function *Fn = - CreateGlobalInitOrDestructFunction(*this, FTy, + CreateGlobalInitOrDestructFunction(FTy, "_GLOBAL__I_" + PrioritySuffix); for (; I < PrioE; ++I) @@ -423,7 +415,7 @@ FileName[i] = '_'; } llvm::Function *Fn = CreateGlobalInitOrDestructFunction( - *this, FTy, llvm::Twine("_GLOBAL__sub_I_", FileName)); + FTy, llvm::Twine("_GLOBAL__sub_I_", FileName)); CodeGenFunction(*this).GenerateCXXGlobalInitFunc(Fn, CXXGlobalInits); AddGlobalCtor(Fn); @@ -440,7 +432,7 @@ // Create our global destructor function. llvm::Function *Fn = - CreateGlobalInitOrDestructFunction(*this, FTy, "_GLOBAL__D_a"); + CreateGlobalInitOrDestructFunction(FTy, "_GLOBAL__D_a"); CodeGenFunction(*this).GenerateCXXGlobalDtorsFunc(Fn, CXXGlobalDtors); AddGlobalDtor(Fn); @@ -562,7 +554,7 @@ getContext().VoidTy, args, FunctionType::ExtInfo(), /*variadic=*/false); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *fn = - CreateGlobalInitOrDestructFunction(CGM, FTy, "__cxx_global_array_dtor"); + CGM.CreateGlobalInitOrDestructFunction(FTy, "__cxx_global_array_dtor"); StartFunction(VD, getContext().VoidTy, fn, FI, args); Index: lib/CodeGen/CGExpr.cpp =================================================================== --- lib/CodeGen/CGExpr.cpp +++ lib/CodeGen/CGExpr.cpp @@ -16,6 +16,7 @@ #include "CGCall.h" #include "CGDebugInfo.h" #include "CGObjCRuntime.h" +#include "CGOpenMPRuntime.h" #include "CGRecordLayout.h" #include "CodeGenModule.h" #include "TargetInfo.h" @@ -1751,6 +1752,14 @@ return CGF.Builder.CreateBitCast(V, IRType->getPointerTo(AS), Name); } +static LValue EmitThreadPrivateVarDeclLValue( + CodeGenFunction &CGF, const VarDecl *VD, QualType T, llvm::Value *V, + llvm::Type *RealVarTy, CharUnits Alignment, SourceLocation Loc) { + V = CGF.CGM.getOpenMPRuntime().getOMPAddrOfThreadPrivate(CGF, V, VD, Loc); + V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); + return CGF.MakeAddrLValue(V, T, Alignment); +} + static LValue EmitGlobalVarDeclLValue(CodeGenFunction &CGF, const Expr *E, const VarDecl *VD) { QualType T = E->getType(); @@ -1764,6 +1773,11 @@ V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); LValue LV; + if (CGF.getLangOpts().OpenMP && + CGF.CGM.getOpenMPRuntime().isOMPThreadPrivateDecl(VD)) { + return EmitThreadPrivateVarDeclLValue(CGF, VD, T, V, RealVarTy, Alignment, + E->getExprLoc()); + } if (VD->getType()->isReferenceType()) { llvm::LoadInst *LI = CGF.Builder.CreateLoad(V); LI->setAlignment(Alignment.getQuantity()); @@ -1876,6 +1890,13 @@ if (!V && VD->isStaticLocal()) V = CGM.getStaticLocalDeclAddress(VD); + // Check if variable is threadprivate. + if (V && getLangOpts().OpenMP && + CGM.getOpenMPRuntime().isOMPThreadPrivateDecl(VD)) + return EmitThreadPrivateVarDeclLValue( + *this, VD, T, V, getTypes().ConvertTypeForMem(VD->getType()), + Alignment, E->getExprLoc()); + // Use special handling for lambdas. if (!V) { if (FieldDecl *FD = LambdaCaptureFields.lookup(VD)) { Index: lib/CodeGen/CGOpenMPRuntime.h =================================================================== --- lib/CodeGen/CGOpenMPRuntime.h +++ lib/CodeGen/CGOpenMPRuntime.h @@ -16,6 +16,7 @@ #include "clang/AST/Type.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" #include "llvm/IR/Type.h" #include "llvm/IR/Value.h" @@ -34,6 +35,7 @@ } // namespace llvm namespace clang { +class VarDecl; namespace CodeGen { @@ -64,11 +66,17 @@ OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140 }; enum OpenMPRTLFunction { - // Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro - // microtask, ...); + /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, + /// kmpc_micro microtask, ...); OMPRTL__kmpc_fork_call, - // Call to kmp_int32 kmpc_global_thread_num(ident_t *loc); - OMPRTL__kmpc_global_thread_num + /// \brief Call to kmp_int32 __kmpc_global_thread_num(ident_t *loc); + OMPRTL__kmpc_global_thread_num, + /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc, + /// kmp_int32 global_tid, void *data, size_t size, void ***cache); + OMPRTL__kmpc_threadprivate_cached, + /// \brief Call to void __kmpc_threadprivate_register( ident_t *, + /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); + OMPRTL__kmpc_threadprivate_register }; private: @@ -132,17 +140,15 @@ typedef llvm::DenseMap OpenMPLocMapTy; OpenMPLocMapTy OpenMPLocMap; /// \brief Map of local gtid and functions. - typedef llvm::DenseMap OpenMPGtidMapTy; - OpenMPGtidMapTy OpenMPGtidMap; - -public: - explicit CGOpenMPRuntime(CodeGenModule &CGM); - ~CGOpenMPRuntime() {} - - /// \brief Cleans up references to the objects in finished function. - /// \param CGF Reference to finished CodeGenFunction. - /// - void FunctionFinished(CodeGenFunction &CGF); + typedef llvm::DenseMap OpenMPThreadIDMapTy; + OpenMPThreadIDMapTy OpenMPThreadIDMap; + /// \brief Map of declarations marked as threadprivate along with locations. + typedef llvm::DenseMap + OpenMPThreadPrivateVarsTy; + OpenMPThreadPrivateVarsTy OpenMPThreadPrivateVars; + /// \brief Map of threadprivate vars and corresponding cache storages. + typedef llvm::StringMap OpenMPThreadPrivateMapTy; + OpenMPThreadPrivateMapTy OpenMPThreadPrivateMap; /// \brief Emits object of ident_t type with info for source location. /// \param CGF Reference to current CodeGenFunction. @@ -170,6 +176,79 @@ /// \param Function OpenMP runtime function. /// \return Specified function. llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function); + + /// \brief If the specified mangled name is not in the module, create and + /// return threadprivate cache object. + /// \param D Threadprivate variable. + /// \param Addr Original global variable. + /// \return Cache variable for the specified threadprivate. + llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD, + llvm::Value *Addr); + +public: + explicit CGOpenMPRuntime(CodeGenModule &CGM); + virtual ~CGOpenMPRuntime() {} + + /// \brief Cleans up references to the objects in finished function. + /// \param CGF Reference to finished CodeGenFunction. + /// + void FunctionFinished(CodeGenFunction &CGF); + + /// \brief Emits code for parallel call of the \a OutlinedFn with variables + /// captured in a record which address is stored in \a CapturedStruct. + /// \param CGF Reference to current CodeGenFunction. + /// \param Loc Clang source location. + /// \param OutlinedFn Outlined function to be run in parallel threads. + /// \param CapturedStruct A pointer to the record with the references to + /// variables used in \a OutlinedFn function. + /// + virtual void EmitOMPParallelCall(CodeGenFunction &CGF, SourceLocation Loc, + llvm::Value *OutlinedFn, + llvm::Value *CapturedStruct); + + /// \brief Registers variable as threadprivate. + /// \param D Threadprivate variable. + /// \param Loc Location of threadprivate variable. + void addOMPThreadPrivateDecl(const VarDecl *VD, SourceLocation Loc); + + /// \brief Checks if the specified declaration or any redeclarations are + /// marked as threadprivate. + /// \param D Variable declaration to be checked. + /// \return true if any redeclarations of the specified declaration are marked + /// as threadprivate. + bool isOMPThreadPrivateDecl(const VarDecl *VD) const; + + /// \brief Gets the source location for the variable if it is marked as + /// threadprivate. + /// \param D Variable declaration to be checked. + /// \return Real location of threadprivate declaration if any redeclarations + /// of the specified declaration are marked as threadprivate, SourceLocation() + /// otherwise. + SourceLocation getOMPThreadPrivateDeclLoc(const VarDecl *VD) const; + + /// \brief Returns address of the threadprivate variable for the current + /// thread. + /// \param CGF Reference to current CodeGenFunction. + /// \param Addr Address of the original variable. + /// \param D Threadprivate variable. + /// \param Loc Location of the reference to threadprivate var. + /// \return Address of the threadprivate variable for the current thread. + virtual llvm::Value *getOMPAddrOfThreadPrivate(CodeGenFunction &CGF, + llvm::Value *Addr, + const VarDecl *VD, + SourceLocation Loc); + + /// \brief Emits the function which registers constructor/destructor for + /// the specified threadprivate variable. + /// \param VD A threadprivate variable. + /// \param Addr The address of the original variable. + /// \param PerformInit Need to create initialization function. + /// \param PerformDestroy Need to create destroy function. + /// \param Loc Threadprivate declaration location. + virtual llvm::Constant * + EmitOMPCXXThreadPrivateInitFunction(const VarDecl &VD, llvm::Constant *Addr, + bool PerformInit, bool PerformDestroy, + SourceLocation Loc); }; } // namespace CodeGen } // namespace clang Index: lib/CodeGen/CGOpenMPRuntime.cpp =================================================================== --- lib/CodeGen/CGOpenMPRuntime.cpp +++ lib/CodeGen/CGOpenMPRuntime.cpp @@ -123,10 +123,10 @@ SourceLocation Loc) { assert(CGF.CurFn && "No function in current CodeGenFunction."); - llvm::Value *GTid = nullptr; - OpenMPGtidMapTy::iterator I = OpenMPGtidMap.find(CGF.CurFn); - if (I != OpenMPGtidMap.end()) { - GTid = I->second; + llvm::Value *ThreadID = nullptr; + OpenMPThreadIDMapTy::iterator I = OpenMPThreadIDMap.find(CGF.CurFn); + if (I != OpenMPThreadIDMap.end()) { + ThreadID = I->second; } else { // Check if current function is a function which has first parameter // with type int32 and name ".global_tid.". @@ -144,24 +144,24 @@ CGF.CurFn->arg_begin()->getName() == ".global_tid.") { CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); - GTid = CGF.Builder.CreateLoad(CGF.CurFn->arg_begin()); + ThreadID = CGF.Builder.CreateLoad(CGF.CurFn->arg_begin()); } else { // Generate "int32 .kmpc_global_thread_num.addr;" CGBuilderTy::InsertPointGuard IPG(CGF.Builder); CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt); llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)}; - GTid = CGF.EmitRuntimeCall( + ThreadID = CGF.EmitRuntimeCall( CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args); } - OpenMPGtidMap[CGF.CurFn] = GTid; + OpenMPThreadIDMap[CGF.CurFn] = ThreadID; } - return GTid; + return ThreadID; } void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) { assert(CGF.CurFn && "No function in current CodeGenFunction."); - if (OpenMPGtidMap.count(CGF.CurFn)) - OpenMPGtidMap.erase(CGF.CurFn); + if (OpenMPThreadIDMap.count(CGF.CurFn)) + OpenMPThreadIDMap.erase(CGF.CurFn); if (OpenMPLocMap.count(CGF.CurFn)) OpenMPLocMap.erase(CGF.CurFn); } @@ -184,7 +184,7 @@ llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, getKmpc_MicroPointerTy()}; llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.VoidTy, TypeParams, true); + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call"); break; } @@ -192,10 +192,206 @@ // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc); llvm::Type *TypeParams[] = {getIdentTyPointerTy()}; llvm::FunctionType *FnTy = - llvm::FunctionType::get(CGM.Int32Ty, TypeParams, false); + llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num"); break; } + case OMPRTL__kmpc_threadprivate_cached: { + // Build void *__kmpc_threadprivate_cached(ident_t *loc, + // kmp_int32 global_tid, void *data, size_t size, void ***cache); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, + CGM.VoidPtrTy, CGM.SizeTy, + CGM.VoidPtrTy->getPointerTo()->getPointerTo()}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached"); + break; + } + case OMPRTL__kmpc_threadprivate_register: { + // Build void __kmpc_threadprivate_register(ident_t *, void *data, + // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor); + // typedef void *(*kmpc_ctor)(void *); + auto KmpcCtorTy = + llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, + /*isVarArg*/ false)->getPointerTo(); + // typedef void (*kmpc_dtor)(void *); + llvm::Type *KmpcCCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto KmpcCCtorTy = + llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCCtorTyArgs, + /*isVarArg*/ false)->getPointerTo(); + // typedef void *(*kmpc_cctor)(void *, void *); + auto KmpcDtorTy = + llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false) + ->getPointerTo(); + llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy, + KmpcCCtorTy, KmpcDtorTy}; + auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs, + /*isVarArg*/ false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register"); + break; + } } return RTLFn; } + +void CGOpenMPRuntime::EmitOMPParallelCall(CodeGenFunction &CGF, + SourceLocation Loc, + llvm::Value *OutlinedFn, + llvm::Value *CapturedStruct) { + // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) + llvm::Value *Args[] = { + EmitOpenMPUpdateLocation(CGF, Loc), + CGF.Builder.getInt32(1), // Number of arguments after 'microtask' argument + // (there is only one additional argument - 'context') + CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()), + CGF.EmitCastToVoidPtr(CapturedStruct)}; + auto RTLFn = CreateRuntimeFunction(CGOpenMPRuntime::OMPRTL__kmpc_fork_call); + CGF.EmitRuntimeCall(RTLFn, Args); +} + +void CGOpenMPRuntime::addOMPThreadPrivateDecl(const VarDecl *VD, + SourceLocation Loc) { + OpenMPThreadPrivateVars[VD] = Loc; +} + +bool CGOpenMPRuntime::isOMPThreadPrivateDecl(const VarDecl *VD) const { + auto CurDecl = VD->getMostRecentDecl(); + while (CurDecl) { + if (OpenMPThreadPrivateVars.count(CurDecl) > 0) + return true; + CurDecl = CurDecl->getPreviousDecl(); + } + return false; +} + +SourceLocation +CGOpenMPRuntime::getOMPThreadPrivateDeclLoc(const VarDecl *VD) const { + auto CurDecl = VD->getMostRecentDecl(); + while (CurDecl) { + if (OpenMPThreadPrivateVars.count(CurDecl) > 0) + return OpenMPThreadPrivateVars.lookup(CurDecl); + CurDecl = CurDecl->getPreviousDecl(); + } + return SourceLocation(); +} + +llvm::Constant * +CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD, + llvm::Value *Addr) { + StringRef MangledName = CGM.getMangledName(VD); + // Lookup the entry, lazily creating it if necessary. + auto &Entry = OpenMPThreadPrivateMap[MangledName]; + if (Entry == nullptr) { + // Create cache memory for threadprivate variable void **Var.cache; + Entry = new llvm::GlobalVariable( + CGM.getModule(), CGM.Int8PtrPtrTy, /*IsConstant*/ false, + llvm::GlobalValue::CommonLinkage, + llvm::Constant::getNullValue(CGM.Int8PtrPtrTy), ".cache.", + dyn_cast(Addr)); + } + + return Entry; +} + +llvm::Value *CGOpenMPRuntime::getOMPAddrOfThreadPrivate(CodeGenFunction &CGF, + llvm::Value *Addr, + const VarDecl *VD, + SourceLocation Loc) { + auto VarTy = Addr->getType()->getPointerElementType(); + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc), + GetOpenMPGlobalThreadNum(CGF, Loc), + CGF.Builder.CreatePointerCast(Addr, CGM.Int8PtrTy), + CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), + getOrCreateThreadPrivateCache(VD, Addr)}; + return CGF.EmitRuntimeCall( + CreateRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args); +} + +llvm::Constant *CGOpenMPRuntime::EmitOMPCXXThreadPrivateInitFunction( + const VarDecl &VD, llvm::Constant *Addr, bool PerformInit, + bool PerformDestroy, SourceLocation Loc) { + QualType ASTTy = VD.getType(); + + llvm::Value *Ctor = nullptr, *CCtor = nullptr, *Dtor = nullptr; + if (PerformInit) { + CodeGenFunction CtorCGF(CGM); + FunctionArgList Args; + ImplicitParamDecl Dst(CGM.getContext(), /*DC*/ nullptr, SourceLocation(), + /*Id*/ nullptr, CGM.getContext().VoidPtrTy); + Args.push_back(&Dst); + + const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( + CGM.getContext().VoidPtrTy, Args, FunctionType::ExtInfo(), + /*isVariadic*/ false); + auto FTy = CGM.getTypes().GetFunctionType(FI); + auto Fn = + CGM.CreateGlobalInitOrDestructFunction(FTy, ".__kmpc_global_ctor_."); + CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, + Args, SourceLocation()); + auto Arg = CtorCGF.EmitScalarConversion( + Fn->arg_begin(), CGM.getContext().VoidPtrTy, + CGM.getContext().getPointerType(ASTTy)); + CtorCGF.EmitAnyExprToMem(VD.getAnyInitializer(), Arg, + VD.getAnyInitializer()->getType().getQualifiers(), + /*IsInitializer*/ true); + CtorCGF.Builder.CreateStore(Fn->arg_begin(), CtorCGF.ReturnValue); + CtorCGF.FinishFunction(); + Ctor = Fn; + } + if (PerformDestroy) { + CodeGenFunction DtorCGF(CGM); + FunctionArgList Args; + ImplicitParamDecl Dst(CGM.getContext(), /*DC*/ nullptr, SourceLocation(), + /*Id*/ nullptr, CGM.getContext().VoidPtrTy); + Args.push_back(&Dst); + + const CGFunctionInfo &FI = CGM.getTypes().arrangeFreeFunctionDeclaration( + CGM.getContext().VoidTy, Args, FunctionType::ExtInfo(), false); + auto FTy = CGM.getTypes().GetFunctionType(FI); + auto Fn = + CGM.CreateGlobalInitOrDestructFunction(FTy, ".__kmpc_global_dtor_."); + DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, + SourceLocation()); + DtorCGF.emitDestroy(Fn->arg_begin(), ASTTy, + DtorCGF.getDestroyer(ASTTy.isDestructedType()), + DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); + DtorCGF.FinishFunction(); + Dtor = Fn; + } + llvm::Type *CCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; + auto CCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CCtorTyArgs, + /*isVarArg*/ false)->getPointerTo(); + CCtor = llvm::Constant::getNullValue(CCtorTy); + if (Ctor == nullptr) { + auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, + /*isVarArg*/ false)->getPointerTo(); + Ctor = llvm::Constant::getNullValue(CtorTy); + } + if (Dtor == nullptr) { + auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, + /*isVarArg*/ false)->getPointerTo(); + Dtor = llvm::Constant::getNullValue(DtorTy); + } + auto InitFunctionTy = llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); + auto InitFunction = CGM.CreateGlobalInitOrDestructFunction( + InitFunctionTy, ".__omp_threadprivate_init_."); + CodeGenFunction InitCGF(CGM); + FunctionArgList ArgList; + InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, + CGM.getTypes().arrangeNullaryFunction(), ArgList, Loc); + // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime + // library. + InitCGF.EmitRuntimeCall(CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), + EmitOpenMPUpdateLocation(InitCGF, VD.getLocation())); + // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) + // to register constructor/destructor for variable. + llvm::Value *Args[] = {EmitOpenMPUpdateLocation(InitCGF, VD.getLocation()), + InitCGF.Builder.CreatePointerCast(Addr, CGM.VoidPtrTy), + Ctor, CCtor, Dtor}; + InitCGF.EmitRuntimeCall( + CreateRuntimeFunction( + CGOpenMPRuntime::OMPRTL__kmpc_threadprivate_register), + Args); + InitCGF.FinishFunction(); + return InitFunction; +} Index: lib/CodeGen/CGStmtOpenMP.cpp =================================================================== --- lib/CodeGen/CGStmtOpenMP.cpp +++ lib/CodeGen/CGStmtOpenMP.cpp @@ -35,17 +35,8 @@ OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS); } - // Build call __kmpc_fork_call(loc, 1, microtask, captured_struct/*context*/) - llvm::Value *Args[] = { - CGM.getOpenMPRuntime().EmitOpenMPUpdateLocation(*this, S.getLocStart()), - Builder.getInt32(1), // Number of arguments after 'microtask' argument - // (there is only one additional argument - 'context') - Builder.CreateBitCast(OutlinedFn, - CGM.getOpenMPRuntime().getKmpc_MicroPointerTy()), - EmitCastToVoidPtr(CapturedStruct)}; - llvm::Constant *RTLFn = CGM.getOpenMPRuntime().CreateRuntimeFunction( - CGOpenMPRuntime::OMPRTL__kmpc_fork_call); - EmitRuntimeCall(RTLFn, Args); + CGM.getOpenMPRuntime().EmitOMPParallelCall(*this, S.getLocStart(), OutlinedFn, + CapturedStruct); } void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { Index: lib/CodeGen/CodeGenModule.h =================================================================== --- lib/CodeGen/CodeGenModule.h +++ lib/CodeGen/CodeGenModule.h @@ -660,6 +660,14 @@ return GetAddrOfGlobalVar(cast(GD.getDecl())); } + /// \brief Creates global initialization or destructor function. + /// \param FTy Type of the function. + /// \param Name Name of the function. + /// \param TLS Is this function for TLS initialization. + llvm::Function *CreateGlobalInitOrDestructFunction(llvm::FunctionType *FTy, + const Twine &Name, + bool TLS = false); + /// Will return a global variable of the given type. If a variable with a /// different type already exists then a new variable with the right type /// will be created and all uses of the old variable will be replaced with a @@ -1032,7 +1040,16 @@ /// are emitted lazily. void EmitGlobal(GlobalDecl D); + /// \brief Emit a code for threadprivate directive. + /// \param D Threadprivate declaration. + void EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D); + private: + /// \brief Emit a code for threadprivate variable. + /// \param VD Threadprivate variable. + /// \param Loc Location of threadprivate declaration. + void EmitOMPThreadPrivateVarDecl(const VarDecl *VD, SourceLocation Loc); + llvm::GlobalValue *GetGlobalValue(StringRef Ref); llvm::Constant * Index: lib/CodeGen/CodeGenModule.cpp =================================================================== --- lib/CodeGen/CodeGenModule.cpp +++ lib/CodeGen/CodeGenModule.cpp @@ -1409,8 +1409,13 @@ return EmitGlobalFunctionDefinition(GD, GV); } - if (const auto *VD = dyn_cast(D)) - return EmitGlobalVarDefinition(VD); + if (const auto *VD = dyn_cast(D)) { + EmitGlobalVarDefinition(VD); + if (getLangOpts().OpenMP && getOpenMPRuntime().isOMPThreadPrivateDecl(VD)) + EmitOMPThreadPrivateVarDecl( + VD, getOpenMPRuntime().getOMPThreadPrivateDeclLoc(VD)); + return; + } llvm_unreachable("Invalid argument to EmitGlobalDefinition()"); } @@ -3213,6 +3218,10 @@ break; } + case Decl::OMPThreadPrivate: + EmitOMPThreadPrivateDecl(cast(D)); + break; + case Decl::ClassTemplateSpecialization: { const auto *Spec = cast(D); if (DebugInfo && @@ -3383,6 +3392,33 @@ return llvm::ConstantStruct::getAnon(Fields); } +void CodeGenModule::EmitOMPThreadPrivateVarDecl(const VarDecl *VD, + SourceLocation Loc) { + if ((VD = VD->getDefinition(Context)) != nullptr) { + QualType ASTTy = VD->getType(); + CXXRecordDecl *RD = ASTTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl(); + bool PerformInit = RD && VD->getAnyInitializer() != nullptr; + bool PerformDestroy = RD && !RD->hasTrivialDestructor(); + auto DeclPtr = VD->isStaticLocal() ? getStaticLocalDeclAddress(VD) + : GetAddrOfGlobalVar(VD); + + if (DeclPtr && (PerformInit || PerformDestroy)) { + auto InitFunction = + getOpenMPRuntime().EmitOMPCXXThreadPrivateInitFunction( + *VD, DeclPtr, PerformInit, PerformDestroy, Loc); + CXXGlobalInits.push_back(InitFunction); + } + } +} + +void CodeGenModule::EmitOMPThreadPrivateDecl(const OMPThreadPrivateDecl *D) { + for (auto RefExpr : D->varlists()) { + const VarDecl *VD = cast(cast(RefExpr)->getDecl()); + EmitOMPThreadPrivateVarDecl(VD, RefExpr->getExprLoc()); + getOpenMPRuntime().addOMPThreadPrivateDecl(VD, RefExpr->getExprLoc()); + } +} + llvm::Constant *CodeGenModule::GetAddrOfRTTIDescriptor(QualType Ty, bool ForEH) { // Return a bogus pointer if RTTI is disabled, unless it's for EH. @@ -3397,4 +3433,3 @@ return getCXXABI().getAddrOfRTTIDescriptor(Ty); } - Index: lib/CodeGen/ModuleBuilder.cpp =================================================================== --- lib/CodeGen/ModuleBuilder.cpp +++ lib/CodeGen/ModuleBuilder.cpp @@ -16,6 +16,7 @@ #include "CodeGenModule.h" #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/DeclOpenMP.h" #include "clang/AST/Expr.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/TargetInfo.h" @@ -128,6 +129,13 @@ return; Builder->UpdateCompletedType(D); + // In C++, we may have member threadprivate decl that need to be emitted + // at this point. + if (Ctx->getLangOpts().CPlusPlus && !D->isDependentContext()) { + for (auto *M : D->decls()) + if (isa(M)) + Builder->EmitTopLevelDecl(M); + } // For MSVC compatibility, treat declarations of static data members with // inline initializers as definitions. Index: lib/Parse/Parser.cpp =================================================================== --- lib/Parse/Parser.cpp +++ lib/Parse/Parser.cpp @@ -624,8 +624,7 @@ HandlePragmaOpenCLExtension(); return DeclGroupPtrTy(); case tok::annot_pragma_openmp: - ParseOpenMPDeclarativeDirective(); - return DeclGroupPtrTy(); + return ParseOpenMPDeclarativeDirective(); case tok::annot_pragma_ms_pointers_to_members: HandlePragmaMSPointersToMembers(); return DeclGroupPtrTy(); Index: lib/Serialization/ASTReaderDecl.cpp =================================================================== --- lib/Serialization/ASTReaderDecl.cpp +++ lib/Serialization/ASTReaderDecl.cpp @@ -2199,7 +2199,7 @@ if (isa(D) || isa(D) || isa(D) || - isa(D)) + isa(D) || isa(D)) return true; if (VarDecl *Var = dyn_cast(D)) return Var->isFileVarDecl() && Index: test/OpenMP/threadprivate_codegen.cpp =================================================================== --- test/OpenMP/threadprivate_codegen.cpp +++ test/OpenMP/threadprivate_codegen.cpp @@ -0,0 +1,558 @@ +// RUN: %clang_cc1 -verify -DBODY -fopenmp=libiomp5 -triple x86_64-unknown-unknown -x c++ -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp=libiomp5 -x c++ -DBODY -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -g -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix=CHECK-DEBUG %s +// expected-no-diagnostics +#ifndef HEADER +#define HEADER +// CHECK-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] } +// CHECK-DAG: [[S2:%.+]] = type { [[INT]], double } +// CHECK-DAG: [[S3:%.+]] = type { [[INT]], float } +// CHECK-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] } +// CHECK-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] } +// CHECK-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double } +// CHECK-DEBUG-DAG: [[IDENT:%.+]] = type { i32, i32, i32, i32, i8* } +// CHECK-DEBUG-DAG: [[S1:%.+]] = type { [[INT:i[0-9]+]] } +// CHECK-DEBUG-DAG: [[S2:%.+]] = type { [[INT]], double } +// CHECK-DEBUG-DAG: [[S3:%.+]] = type { [[INT]], float } +// CHECK-DEBUG-DAG: [[S4:%.+]] = type { [[INT]], [[INT]] } +// CHECK-DEBUG-DAG: [[S5:%.+]] = type { [[INT]], [[INT]], [[INT]] } +// CHECK-DEBUG-DAG: [[SMAIN:%.+]] = type { [[INT]], double, double } + +struct S1 { + int a; + S1() + : a(0) { + } + S1(int a) + : a(a) { + } + S1(const S1 &s) { + a = 12 + s.a; + } + ~S1() { + a = 0; + } +}; + +struct S2 { + int a; + double b; + S2() + : a(0) { + } + S2(int a) + : a(a) { + } + S2(const S2 &s) { + a = 12 + s.a; + } + ~S2() { + a = 0; + } +}; + +struct S3 { + int a; + float b; + S3() + : a(0) { + } + S3(int a) + : a(a) { + } + S3(const S3 &s) { + a = 12 + s.a; + } + ~S3() { + a = 0; + } +}; + +struct S4 { + int a, b; + S4() + : a(0) { + } + S4(int a) + : a(a) { + } + S4(const S4 &s) { + a = 12 + s.a; + } + ~S4() { + a = 0; + } +}; + +struct S5 { + int a, b, c; + S5() + : a(0) { + } + S5(int a) + : a(a) { + } + S5(const S5 &s) { + a = 12 + s.a; + } + ~S5() { + a = 0; + } +}; + +// CHECK: [[CACHE1:@[_.0-9a-zA-Z]+]] = common global i8** null +// CHECK-NEXT: [[GS1:@.+]] = internal global [[S1]] zeroinitializer +// CHECK: [[DEFAULT_LOC:@.+]] = private unnamed_addr constant [[IDENT]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([{{[0-9]+}} x i8]* {{@.+}}, i32 0, i32 0) } +// CHECK-NOT: {{@.+}} = common global i8** null +// CHECK: [[GS2:@.+]] = internal global [[S2]] zeroinitializer +// CHECK: [[CACHE2:@[_.0-9a-zA-Z]+]] = common global i8** null +// CHECK-NEXT: [[ARR_X:@.+]] = global [2 x [3 x [[S1]]]] zeroinitializer +// CHECK: [[CACHE3:@[_.0-9a-zA-Z]+]] = common global i8** null +// CHECK-NEXT: [[SM:@.+]] = internal global [[SMAIN]] zeroinitializer +// CHECK: [[CACHE4:@[_.0-9a-zA-Z]+]] = common global i8** null +// CHECK-NEXT: [[STATIC_S:@.+]] = external global [[S3]] +// CHECK: [[CACHE5:@[_.0-9a-zA-Z]+]] = common global i8** null +// CHECK-NEXT: [[GS3:@.+]] = external global [[S5]] +// CHECK: [[CACHE6:@[_.0-9a-zA-Z]+]] = common global i8** null +// CHECK-NEXT: [[ST_INT_ST:@.+]] = linkonce_odr global i32 23 +// CHECK: [[CACHE7:@[_.0-9a-zA-Z]+]] = common global i8** null +// CHECK-NEXT: [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01 +// CHECK: [[CACHE8:@[_.0-9a-zA-Z]+]] = common global i8** null +// CHECK-NEXT: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer +// CHECK-NOT: {{@.+}} = common global i8** null +// There is no cache for gs2 - it is not threadprivate. Check that there is only +// 8 caches created (for Static::s, gs1, gs3, arr_x, main::sm, ST::st, +// ST::st, ST::st) +// CHECK-DEBUG-DAG: [[GS1:@.+]] = internal global [[S1]] zeroinitializer +// CHECK-DEBUG-DAG: [[GS2:@.+]] = internal global [[S2]] zeroinitializer +// CHECK-DEBUG-DAG: [[ARR_X:@.+]] = global [2 x [3 x [[S1]]]] zeroinitializer +// CHECK-DEBUG-DAG: [[SM:@.+]] = internal global [[SMAIN]] zeroinitializer +// CHECK-DEBUG-DAG: [[STATIC_S:@.+]] = external global [[S3]] +// CHECK-DEBUG-DAG: [[GS3:@.+]] = external global [[S5]] +// CHECK-DEBUG-DAG: [[ST_INT_ST:@.+]] = linkonce_odr global i32 23 +// CHECK-DEBUG-DAG: [[ST_FLOAT_ST:@.+]] = linkonce_odr global float 2.300000e+01 +// CHECK-DEBUG-DAG: [[ST_S4_ST:@.+]] = linkonce_odr global %struct.S4 zeroinitializer +// CHECK-DEBUG-DAG: [[LOC1:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;155;11;;\00" +// CHECK-DEBUG-DAG: [[LOC2:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;202;4;;\00" +// CHECK-DEBUG-DAG: [[LOC3:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;292;19;;\00" +// CHECK-DEBUG-DAG: [[LOC4:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;292;16;;\00" +// CHECK-DEBUG-DAG: [[LOC5:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;326;9;;\00" +// CHECK-DEBUG-DAG: [[LOC6:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;343;10;;\00" +// CHECK-DEBUG-DAG: [[LOC7:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;360;10;;\00" +// CHECK-DEBUG-DAG: [[LOC8:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;386;10;;\00" +// CHECK-DEBUG-DAG: [[LOC9:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;407;10;;\00" +// CHECK-DEBUG-DAG: [[LOC10:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;422;10;;\00" +// CHECK-DEBUG-DAG: [[LOC11:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;439;27;;\00" +// CHECK-DEBUG-DAG: [[LOC12:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;main;456;10;;\00" +// CHECK-DEBUG-DAG: [[LOC13:@.*]] = private unnamed_addr constant [{{[0-9]+}} x i8] c";{{.*}}threadprivate_codegen.cpp;;264;10;;\00" + +struct Static { + static S3 s; +#pragma omp threadprivate(s) +}; + +static S1 gs1(5); +#pragma omp threadprivate(gs1) +// CHECK: define {{.*}} void [[S1_CTOR:@.*]]([[S1]]* %{{.*}}, +// CHECK: define {{.*}} void [[S1_DTOR:@.*]]([[S1]]* %{{.*}}) +// CHECK: define internal i8* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK: [[RES:%.*]] = bitcast i8* %0 to [[S1]]* +// CHECK-NEXT: call void [[S1_CTOR]]([[S1]]* [[RES]], {{.*}} 5) +// CHECK-NEXT: ret i8* %0 +// CHECK-NEXT: } +// CHECK: define internal void [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK: [[RES:%.*]] = bitcast i8* %0 to [[S1]]* +// CHECK-NEXT: call void [[S1_DTOR]]([[S1]]* [[RES]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK: define internal void [[GS1_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK: call void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK-DEBUG: define {{.*}} void [[S1_CTOR:@.*]]([[S1]]* %{{.*}}, +// CHECK-DEBUG: define {{.*}} void [[S1_DTOR:@.*]]([[S1]]* %{{.*}}) +// CHECK-DEBUG: define internal i8* [[GS1_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK-DEBUG: [[RES:%.*]] = bitcast i8* %0 to [[S1]]* +// CHECK-DEBUG-NEXT: call void [[S1_CTOR]]([[S1]]* [[RES]], {{.*}} 5) +// CHECK-DEBUG-NEXT: ret i8* %0 +// CHECK-DEBUG-NEXT: } +// CHECK-DEBUG: define internal void [[GS1_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK-DEBUG: [[RES:%.*]] = bitcast i8* %0 to [[S1]]* +// CHECK-DEBUG-NEXT: call void [[S1_DTOR]]([[S1]]* [[RES]]) +// CHECK-DEBUG-NEXT: ret void +// CHECK-DEBUG-NEXT: } +// CHECK-DEBUG: define internal void [[GS1_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: @__kmpc_global_thread_num +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC1]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: call void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i8* (i8*)* [[GS1_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[GS1_DTOR]]) +// CHECK-DEBUG-NEXT: ret void +// CHECK-DEBUG-NEXT: } +static S2 gs2(27); +// CHECK: define {{.*}} void [[S2_CTOR:@.*]]([[S2]]* %{{.*}}, +// CHECK: define {{.*}} void [[S2_DTOR:@.*]]([[S2]]* %{{.*}}) +// No another call for S2 constructor because it is not threadprivate +// CHECK-NOT: call void [[S2_CTOR]]([[S2]]* +// CHECK-DEBUG: define {{.*}} void [[S2_CTOR:@.*]]([[S2]]* %{{.*}}, +// CHECK-DEBUG: define {{.*}} void [[S2_DTOR:@.*]]([[S2]]* %{{.*}}) +// No another call for S2 constructor because it is not threadprivate +// CHECK-DEBUG-NOT: call void [[S2_CTOR]]([[S2]]* +S1 arr_x[2][3] = { { 1, 2, 3 }, { 4, 5, 6 } }; +#pragma omp threadprivate(arr_x) +// CHECK: define {{.*}} i8* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK: [[RES:%.*]] = bitcast i8* %0 to [2 x [3 x [[S1]]]]* +// CHECK: [[ARR1:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[RES]], i{{.*}} 0, i{{.*}} 0 +// CHECK: [[ARR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR1]], i{{.*}} 0, i{{.*}} 0 +// CHECK: invoke void [[S1_CTOR]]([[S1]]* [[ARR]], [[INT]] 1) +// CHECK: [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR]], i{{.*}} 1 +// CHECK: invoke void [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT]], [[INT]] 2) +// CHECK: [[ARR_ELEMENT2:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENT]], i{{.*}} 1 +// CHECK: invoke void [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT2]], [[INT]] 3) +// CHECK: [[ARR_ELEMENT3:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR1]], i{{.*}} 1 +// CHECK: [[ARR_:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_ELEMENT3]], i{{.*}} 0, i{{.*}} 0 +// CHECK: invoke void [[S1_CTOR]]([[S1]]* [[ARR_]], [[INT]] 4) +// CHECK: [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR_]], i{{.*}} 1 +// CHECK: invoke void [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT]], [[INT]] 5) +// CHECK: [[ARR_ELEMENT2:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENT]], i{{.*}} 1 +// CHECK: invoke void [[S1_CTOR]]([[S1]]* [[ARR_ELEMENT2]], [[INT]] 6) +// CHECK: ret i8* %0 +// CHECK: } +// CHECK: define {{.*}} void [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK: [[ARR_BEGIN:%.*]] = bitcast i8* %0 to [[S1]]* +// CHECK-NEXT: [[ARR_CUR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_BEGIN]], i{{.*}} 6 +// CHECK-NEXT: br label %[[ARR_LOOP:.*]] +// CHECK: {{.*}}[[ARR_LOOP]]{{.*}} +// CHECK-NEXT: [[ARR_ELEMENTPAST:%.*]] = phi [[S1]]* [ [[ARR_CUR]], {{.*}} ], [ [[ARR_ELEMENT:%.*]], {{.*}} ] +// CHECK-NEXT: [[ARR_ELEMENT:%.*]] = getelementptr inbounds [[S1]]* [[ARR_ELEMENTPAST]], i{{.*}} -1 +// CHECK-NEXT: invoke void [[S1_DTOR]]([[S1]]* [[ARR_ELEMENT]]) +// CHECK: [[ARR_DONE:%.*]] = icmp eq [[S1]]* [[ARR_ELEMENT]], [[ARR_BEGIN]] +// CHECK-NEXT: br i1 [[ARR_DONE]], label %[[ARR_EXIT:.*]], label %[[ARR_LOOP]] +// CHECK: {{.*}}[[ARR_EXIT]]{{.*}} +// CHECK-NEXT: ret void +// CHECK: } +// CHECK: define {{.*}} void [[ARR_X_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK: call void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK-DEBUG: define {{.*}} i8* [[ARR_X_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK-DEBUG: } +// CHECK-DEBUG: define {{.*}} void [[ARR_X_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK-DEBUG: } +// CHECK-DEBUG: define {{.*}} void [[ARR_X_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: @__kmpc_global_thread_num +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC2]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: call void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i8* (i8*)* [[ARR_X_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ARR_X_DTOR]]) +// CHECK-DEBUG-NEXT: ret void +// CHECK-DEBUG-NEXT: } +extern S5 gs3; +#pragma omp threadprivate(gs3) +// No call for S5 constructor because gs3 has just declaration, not a definition. +// CHECK-NOT: call void {{.*}}([[S5]]* +// CHECK-DEBUG-NOT: call void {{.*}}([[S5]]* + +template +struct ST { + static T st; +#pragma omp threadprivate(st) +}; + +template +T ST::st(23); + +#endif + +#ifdef BODY + +// CHECK: define i32 @main() +// CHECK-DEBUG: define i32 @main() +int main() { + // CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] + int Res; + struct Smain { + int a; + double b, c; + Smain() + : a(0) { + } + Smain(int a) + : a(a) { + } + Smain(const Smain &s) { + a = 12 + s.a; + } + ~Smain() { + a = 0; + } + }; + + static Smain sm(gs1.a); +// CHECK: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]]) +// CHECK: call i{{.*}} @__cxa_guard_acquire +// CHECK: [[GS1_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE1]]) +// CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* +// CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 +// CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] +// CHECK-NEXT: invoke void [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] [[GS1_A]]) +// CHECK: call void @__cxa_guard_release +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]]) +// CHECK-DEBUG: call i{{.*}} @__cxa_guard_acquire +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: [[GS1_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** +// CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* +// CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 +// CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] +// CHECK-DEBUG-NEXT: invoke void [[SMAIN_CTOR:.*]]([[SMAIN]]* [[SM]], [[INT]] [[GS1_A]]) +// CHECK-DEBUG: call void @__cxa_guard_release +#pragma omp threadprivate(sm) + // CHECK: [[STATIC_S_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE4]]) + // CHECK-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]* + // CHECK-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[STATIC_S_A:%.*]] = load [[INT]]* [[STATIC_S_A_ADDR]] + // CHECK-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC5]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[STATIC_S_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S3]]* [[STATIC_S]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[STATIC_S_ADDR:%.*]] = bitcast i8* [[STATIC_S_TEMP_ADDR]] to [[S3]]* + // CHECK-DEBUG-NEXT: [[STATIC_S_A_ADDR:%.*]] = getelementptr inbounds [[S3]]* [[STATIC_S_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[STATIC_S_A:%.*]] = load [[INT]]* [[STATIC_S_A_ADDR]] + // CHECK-DEBUG-NEXT: store [[INT]] [[STATIC_S_A]], [[INT]]* [[RES_ADDR:[^,]+]] + Res = Static::s.a; + // CHECK: [[SM_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE3]]) + // CHECK-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]* + // CHECK-NEXT: [[SM_A_ADDR:%.*]] = getelementptr inbounds [[SMAIN]]* [[SM_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[SM_A:%.*]] = load [[INT]]* [[SM_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC6]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[SM_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[SM_ADDR:%.*]] = bitcast i8* [[SM_TEMP_ADDR]] to [[SMAIN]]* + // CHECK-DEBUG-NEXT: [[SM_A_ADDR:%.*]] = getelementptr inbounds [[SMAIN]]* [[SM_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[SM_A:%.*]] = load [[INT]]* [[SM_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[SM_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += sm.a; + // CHECK: [[GS1_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE1]]) + // CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* + // CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC7]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[GS1_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* + // CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS1_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += gs1.a; + // CHECK: [[GS2_A:%.*]] = load [[INT]]* getelementptr inbounds ([[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[GS2_A:%.*]] = load [[INT]]* getelementptr inbounds ([[S2]]* [[GS2]], i{{.*}} 0, i{{.*}} 0) + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS2_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += gs2.a; + // CHECK: [[GS3_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE5]]) + // CHECK-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]* + // CHECK-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[GS3_A:%.*]] = load [[INT]]* [[GS3_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC8]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[GS3_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S5]]* [[GS3]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[GS3_ADDR:%.*]] = bitcast i8* [[GS3_TEMP_ADDR]] to [[S5]]* + // CHECK-DEBUG-NEXT: [[GS3_A_ADDR:%.*]] = getelementptr inbounds [[S5]]* [[GS3_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[GS3_A:%.*]] = load [[INT]]* [[GS3_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[GS3_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += gs3.a; + // CHECK: [[ARR_X_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE2]]) + // CHECK-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]* + // CHECK-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1 + // CHECK-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1 + // CHECK-NEXT: [[ARR_X_1_1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_X_1_1_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[ARR_X_1_1_A:%.*]] = load [[INT]]* [[ARR_X_1_1_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC9]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[ARR_X_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([2 x [3 x [[S1]]]]* [[ARR_X]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[ARR_X_ADDR:%.*]] = bitcast i8* [[ARR_X_TEMP_ADDR]] to [2 x [3 x [[S1]]]]* + // CHECK-DEBUG-NEXT: [[ARR_X_1_ADDR:%.*]] = getelementptr inbounds [2 x [3 x [[S1]]]]* [[ARR_X_ADDR]], i{{.*}} 0, i{{.*}} 1 + // CHECK-DEBUG-NEXT: [[ARR_X_1_1_ADDR:%.*]] = getelementptr inbounds [3 x [[S1]]]* [[ARR_X_1_ADDR]], i{{.*}} 0, i{{.*}} 1 + // CHECK-DEBUG-NEXT: [[ARR_X_1_1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[ARR_X_1_1_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[ARR_X_1_1_A:%.*]] = load [[INT]]* [[ARR_X_1_1_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ARR_X_1_1_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += arr_x[1][1].a; + // CHECK: [[ST_INT_ST_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE6]]) + // CHECK-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]* + // CHECK-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]]* [[ST_INT_ST_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC10]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[ST_INT_ST_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[INT]]* [[ST_INT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[ST_INT_ST_ADDR:%.*]] = bitcast i8* [[ST_INT_ST_TEMP_ADDR]] to [[INT]]* + // CHECK-DEBUG-NEXT: [[ST_INT_ST_VAL:%.*]] = load [[INT]]* [[ST_INT_ST_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_INT_ST_VAL]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += ST::st; + // CHECK: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE7]]) + // CHECK-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float* + // CHECK-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float* [[ST_FLOAT_ST_ADDR]] + // CHECK-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC11]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast (float* [[ST_FLOAT_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_ADDR:%.*]] = bitcast i8* [[ST_FLOAT_ST_TEMP_ADDR]] to float* + // CHECK-DEBUG-NEXT: [[ST_FLOAT_ST_VAL:%.*]] = load float* [[ST_FLOAT_ST_ADDR]] + // CHECK-DEBUG-NEXT: [[FLOAT_TO_INT_CONV:%.*]] = fptosi float [[ST_FLOAT_ST_VAL]] to [[INT]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[FLOAT_TO_INT_CONV]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += static_cast(ST::st); + // CHECK: [[ST_S4_ST_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE8]]) + // CHECK-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]* + // CHECK-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]]* [[ST_S4_ST_A_ADDR]] + // CHECK-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] + // CHECK-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + // CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 + // CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC12]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] + // CHECK-DEBUG-NEXT: [[ST_S4_ST_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i{{.*}} {{[0-9]+}}, i8*** + // CHECK-DEBUG-NEXT: [[ST_S4_ST_ADDR:%.*]] = bitcast i8* [[ST_S4_ST_TEMP_ADDR]] to [[S4]]* + // CHECK-DEBUG-NEXT: [[ST_S4_ST_A_ADDR:%.*]] = getelementptr inbounds [[S4]]* [[ST_S4_ST_ADDR]], i{{.*}} 0, i{{.*}} 0 + // CHECK-DEBUG-NEXT: [[ST_S4_ST_A:%.*]] = load [[INT]]* [[ST_S4_ST_A_ADDR]] + // CHECK-DEBUG-NEXT: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: [[ADD:%.*]] = add {{.*}} [[INT]] [[RES]], [[ST_S4_ST_A]] + // CHECK-DEBUG-NEXT: store [[INT]] [[ADD]], [[INT]]* [[RES:.+]] + Res += ST::st.a; + // CHECK: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-NEXT: ret [[INT]] [[RES]] + // CHECK-DEBUG: [[RES:%.*]] = load [[INT]]* [[RES_ADDR]] + // CHECK-DEBUG-NEXT: ret [[INT]] [[RES]] + return Res; +} +// CHECK: } + +// CHECK: define {{.*}} void [[SMAIN_CTOR]]([[SMAIN]]* %{{.*}}, +// CHECK: define {{.*}} void [[SMAIN_DTOR:@.*]]([[SMAIN]]* %{{.*}}) +// CHECK: define internal i8* [[SM_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT]]* [[DEFAULT_LOC]]) +// CHECK: [[RES:%.*]] = bitcast i8* %0 to [[SMAIN]]* +// CHECK: [[GS1_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[DEFAULT_LOC]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** [[CACHE1]]) +// CHECK-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* +// CHECK-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 +// CHECK-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] +// CHECK-NEXT: call void [[SMAIN_CTOR]]([[SMAIN]]* [[RES]], [[INT]] [[GS1_A]]) +// CHECK-NEXT: ret i8* %0 +// CHECK-NEXT: } +// CHECK: define internal void [[SM_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK: [[RES:%.*]] = bitcast i8* %0 to [[SMAIN]]* +// CHECK-NEXT: call void [[SMAIN_DTOR]]([[SMAIN]]* [[RES]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK: define internal void [[SM_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK: call void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK-DEBUG: define {{.*}} void [[SMAIN_CTOR]]([[SMAIN]]* %{{.*}}, +// CHECK-DEBUG: define {{.*}} void [[SMAIN_DTOR:@.*]]([[SMAIN]]* %{{.*}}) +// CHECK-DEBUG: define internal i8* [[SM_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG-NEXT: [[THREAD_NUM:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT]]* [[KMPC_LOC_ADDR]]) +// CHECK-DEBUG: [[RES:%.*]] = bitcast i8* %0 to [[SMAIN]]* +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC3]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: [[GS1_TEMP_ADDR:%.*]] = call i8* @__kmpc_threadprivate_cached([[IDENT]]* [[KMPC_LOC_ADDR]], i32 [[THREAD_NUM]], i8* bitcast ([[S1]]* [[GS1]] to i8*), i{{.*}} {{[0-9]+}}, i8*** +// CHECK-DEBUG-NEXT: [[GS1_ADDR:%.*]] = bitcast i8* [[GS1_TEMP_ADDR]] to [[S1]]* +// CHECK-DEBUG-NEXT: [[GS1_A_ADDR:%.*]] = getelementptr inbounds [[S1]]* [[GS1_ADDR]], i{{.*}} 0, i{{.*}} 0 +// CHECK-DEBUG-NEXT: [[GS1_A:%.*]] = load [[INT]]* [[GS1_A_ADDR]] +// CHECK-DEBUG-NEXT: call void [[SMAIN_CTOR]]([[SMAIN]]* [[RES]], [[INT]] [[GS1_A]]) +// CHECK-DEBUG-NEXT: ret i8* %0 +// CHECK-DEBUG-NEXT: } +// CHECK-DEBUG: define internal void [[SM_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK-DEBUG: } +// CHECK-DEBUG: define internal void [[SM_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: @__kmpc_global_thread_num +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC4]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: call void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[SMAIN]]* [[SM]] to i8*), i8* (i8*)* [[SM_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[SM_DTOR]]) +// CHECK-DEBUG-NEXT: ret void +// CHECK-DEBUG-NEXT: } + +// CHECK: define {{.*}} void [[S4_CTOR:@.*]]([[S4]]* %{{.*}}, +// CHECK: define {{.*}} void [[S4_DTOR:@.*]]([[S4]]* %{{.*}}) +// CHECK: define internal i8* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK: [[RES:%.*]] = bitcast i8* %0 to [[S4]]* +// CHECK-NEXT: call void [[S4_CTOR]]([[S4]]* [[RES]], {{.*}} 23) +// CHECK-NEXT: ret i8* %0 +// CHECK-NEXT: } +// CHECK: define internal void [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK: [[RES:%.*]] = bitcast i8* %0 to [[S4]]* +// CHECK-NEXT: call void [[S4_DTOR]]([[S4]]* [[RES]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK: define internal void [[ST_S4_ST_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK: call void @__kmpc_threadprivate_register([[IDENT]]* [[DEFAULT_LOC]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR]]) +// CHECK-NEXT: ret void +// CHECK-NEXT: } +// CHECK-DEBUG: define {{.*}} void [[S4_CTOR:@.*]]([[S4]]* %{{.*}}, +// CHECK-DEBUG: define {{.*}} void [[S4_DTOR:@.*]]([[S4]]* %{{.*}}) +// CHECK-DEBUG: define internal i8* [[ST_S4_ST_CTOR:@\.__kmpc_global_ctor_\..*]](i8*) +// CHECK-DEBUG: } +// CHECK-DEBUG: define internal void [[ST_S4_ST_DTOR:@\.__kmpc_global_dtor_\..*]](i8*) +// CHECK-DEBUG: } +// CHECK-DEBUG: define internal void [[ST_S4_ST_INIT:@\.__omp_threadprivate_init_\..*]]() +// CHECK-DEBUG: [[KMPC_LOC_ADDR:%.*]] = alloca [[IDENT]] +// CHECK-DEBUG: @__kmpc_global_thread_num +// CHECK-DEBUG: [[KMPC_LOC_ADDR_PSOURCE:%.*]] = getelementptr inbounds [[IDENT]]* [[KMPC_LOC_ADDR]], i{{.*}} 0, i{{.*}} 4 +// CHECK-DEBUG-NEXT: store i8* getelementptr inbounds ([{{.*}} x i8]* [[LOC13]], i{{.*}} 0, i{{.*}} 0), i8** [[KMPC_LOC_ADDR_PSOURCE]] +// CHECK-DEBUG: call void @__kmpc_threadprivate_register([[IDENT]]* [[KMPC_LOC_ADDR]], i8* bitcast ([[S4]]* [[ST_S4_ST]] to i8*), i8* (i8*)* [[ST_S4_ST_CTOR]], i8* (i8*, i8*)* null, void (i8*)* [[ST_S4_ST_DTOR]]) +// CHECK-DEBUG-NEXT: ret void +// CHECK-DEBUG-NEXT: } + +// CHECK: define internal void {{@.*}}() +// CHECK-DAG: call void [[GS1_INIT]]() +// CHECK-DAG: call void [[ARR_X_INIT]]() +// CHECK-DAG: call void [[SM_INIT]]() +// CHECK-DAG: call void [[ST_S4_ST_INIT]]() +// CHECK: ret void +// CHECK-DEBUG: define internal void {{@.*}}() +// CHECK-DEBUG-DAG: call void [[GS1_INIT]]() +// CHECK-DEBUG-DAG: call void [[ARR_X_INIT]]() +// CHECK-DEBUG-DAG: call void [[SM_INIT]]() +// CHECK-DEBUG-DAG: call void [[ST_S4_ST_INIT]]() +// CHECK-DEBUG: ret void +#endif +