diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2614,7 +2614,12 @@ *VD, CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false)), getContext().getDeclAlign(VD)); - // No other cases for now. + // add to ForeignStaticVarDecls if this is a thread-local variable + // declared in a different function + const Decl *DC = cast(VD->getDeclContext()); + if (DC != CurGD.getDecl() && VD->getTLSKind() == VarDecl::TLS_Dynamic) + ForeignStaticTLSVars.insert(VD); + // No other cases for now. } else { llvm_unreachable("DeclRefExpr for Decl not entered in LocalDeclMap?"); } diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -467,6 +467,10 @@ /// should emit cleanups. bool CurFuncIsThunk = false; + /// static thread-local variables we've referenced that were declared in a + /// parent function. + llvm::SmallSet ForeignStaticTLSVars; + /// In ARC, whether we should autorelease the return value. bool AutoreleaseResult = false; diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -31,12 +31,16 @@ #include "clang/Basic/TargetInfo.h" #include "clang/CodeGen/CGFunctionInfo.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Operator.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/IR/ValueSymbolTable.h" using namespace clang; using namespace CodeGen; @@ -384,6 +388,64 @@ CGBuilderTy(*this, AllocaInsertPt).CreateCall(FrameEscapeFn, EscapeArgs); } + // Emit initializers for static local variables that we referenced that are + // declared in another function, which may be uninitialized on entry if this + // function may execute on a separate thread. For example, when we're + // emitting the lambda in the following code: + // + // class Object { + // int init; + // Object() : init(1) {} + // }; + // + // main() { + // static thread_local Object var; + // std::thread([] { + // ...emit initializer for var here... + // }); + // } + // + // or another example: + // + // main() { + // static Object var; + // #pragma omp threadprivate(var) + // #pragma omp parallel + // { + // ...emit initializer for var here... + // } + // } + for (const VarDecl *VD : ForeignStaticTLSVars) { + assert(VD->isStaticLocal() && "expected a static local VarDecl"); + + // CUDA's local and local static __shared__ variables should not + // have any non-empty initializers. This is ensured by Sema. + // Whatever initializer such variable may have when it gets here is + // a no-op and should not be emitted. + bool isCudaSharedVar = getLangOpts().CUDA && getLangOpts().CUDAIsDevice && + VD->hasAttr(); + // If this value has an initializer, and it's thread-local, emit it. + if (VD->getInit() && !isCudaSharedVar) { + auto *GV = dyn_cast(CGM.getStaticLocalDeclAddress(VD)); + auto IP = Builder.saveAndClearIP(); + llvm::BasicBlock *BBParent = AllocaInsertPt->getParent(); + llvm::Instruction *INext = AllocaInsertPt->getNextNonDebugInstruction(); + llvm::BasicBlock *BBNext = BBParent->splitBasicBlock(INext, BBParent->getName() + ".next"); + + INext = AllocaInsertPt->getNextNonDebugInstruction(); + + Builder.SetInsertPoint(BBParent); + // the global variable shouldn't change, as this function should've + // been called first when generating the parent function + AddInitializerToStaticVarDecl(*VD, GV); + if (INext != BBParent->getTerminator()) { + INext->eraseFromParent(); + Builder.CreateBr(BBNext); + } + Builder.restoreIP(IP); + } + } + // Remove the AllocaInsertPt instruction, which is just a convenience for us. llvm::Instruction *Ptr = AllocaInsertPt; AllocaInsertPt = nullptr; diff --git a/clang/test/CodeGenCXX/cxx11-thread-local.cpp b/clang/test/CodeGenCXX/cxx11-thread-local.cpp --- a/clang/test/CodeGenCXX/cxx11-thread-local.cpp +++ b/clang/test/CodeGenCXX/cxx11-thread-local.cpp @@ -268,15 +268,37 @@ return this->n; } +namespace static_tls_in_lambda { + struct X { + X() {} + }; + + + X (*f())() { + static thread_local X x; + + return [] { + return x; + }; + } + + auto y = f(); + + void g() { y(); } +} + namespace { thread_local int anon_i{1}; } void set_anon_i() { anon_i = 2; } + + // LINUX-LABEL: define internal i32* @_ZTWN12_GLOBAL__N_16anon_iE() // DARWIN-LABEL: define internal cxx_fast_tlscc i32* @_ZTWN12_GLOBAL__N_16anon_iE() + // LINUX: define internal void @[[V_M_INIT]]() // DARWIN: define internal cxx_fast_tlscc void @[[V_M_INIT]]() // LINUX-SAME: comdat($_ZN1VIiE1mE) @@ -290,6 +312,8 @@ // CHECK: store i64 1, i64* @_ZGVN1VIiE1mE // CHECK: br label + + // LINUX: define internal void @[[X_M_INIT]]() // DARWIN: define internal cxx_fast_tlscc void @[[X_M_INIT]]() // LINUX-SAME: comdat($_ZN1XIiE1mE) @@ -303,6 +327,14 @@ // CHECK: store i64 1, i64* @_ZGVN1XIiE1mE // CHECK: br label +// CHECK: define internal void @"_ZZN20static_tls_in_lambda1fEvENK3$_1clEv" +// CHECK: %[[static_tls_guard_val:.*]] = load i8, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x +// CHECK: %[[static_tls_guard_init:.*]] = icmp eq i8 %[[static_tls_guard_val]], 0 +// CHECK: br i1 %[[static_tls_guard_init]], +// CHECK: call void @_ZN20static_tls_in_lambda1XC1Ev(%"struct.static_tls_in_lambda::X"* @_ZZN20static_tls_in_lambda1fEvE1x) +// CHECK: store i8 1, i8* @_ZGVZN20static_tls_in_lambda1fEvE1x, align 1 + + // CHECK: define {{.*}}@[[GLOBAL_INIT:.*]]() // CHECK: call void @[[C_INIT]]() // CHECK: call void @[[E_INIT]]()