diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2518,7 +2518,8 @@ const Expr *E, const VarDecl *VD) { QualType T = E->getType(); - // If it's thread_local, emit a call to its wrapper function instead. + // If it's a dynamic thread_local, and the ABI requires a wrapper function, + // emit a call to its wrapper function instead. if (VD->getTLSKind() == VarDecl::TLS_Dynamic && CGF.CGM.getCXXABI().usesThreadWrapperFunction(VD)) return CGF.CGM.getCXXABI().EmitThreadLocalVarDeclLValue(CGF, VD, T); @@ -2530,15 +2531,20 @@ return CGF.MakeAddrLValue(Addr, T, AlignmentSource::Decl); } + bool ShouldEmitPrivateCopy = CGF.getLangOpts().OpenMP && + !CGF.getLangOpts().OpenMPSimd && + VD->hasAttr(); llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD); + if (VD->getTLSKind() != VarDecl::TLS_None && !ShouldEmitPrivateCopy && + CGF.getLangOpts().Coroutines) + V = CGF.Builder.CreateThreadLocal(V); llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType()); V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); Address Addr(V, Alignment); // Emit reference to the private copy of the variable if it is an OpenMP // threadprivate variable. - if (CGF.getLangOpts().OpenMP && !CGF.getLangOpts().OpenMPSimd && - VD->hasAttr()) { + if (ShouldEmitPrivateCopy) { return EmitThreadPrivateVarDeclLValue(CGF, VD, T, Addr, RealVarTy, E->getExprLoc()); } diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2915,9 +2915,11 @@ Builder.SetInsertPoint(ExitBB); } + llvm::Value *Val = Var; + if (CGM.getLangOpts().Coroutines) + Val = Builder.CreateThreadLocal(Val); // For a reference, the result of the wrapper function is a pointer to // the referenced object. - llvm::Value *Val = Var; if (VD->getType()->isReferenceType()) { CharUnits Align = CGM.getContext().getDeclAlign(VD); Val = Builder.CreateAlignedLoad(Val, Align); diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -29,6 +29,7 @@ // CHECK-FULL-O0: Running pass: AlwaysInlinerPass // CHECK-FULL-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis +// CHECK-FULL-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass @@ -38,6 +39,7 @@ // CHECK-THIN-O0: Running pass: AlwaysInlinerPass // CHECK-THIN-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis +// CHECK-THIN-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass diff --git a/clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp b/clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp --- a/clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp +++ b/clang/test/CodeGenCXX/cxx2a-thread-local-constinit.cpp @@ -31,7 +31,8 @@ // CHECK-LABEL: define i32 @_Z5get_bv() // CHECK-NOT: call -// CHECK: load i32, i32* @b +// CHECK: %[[TMP:.+]] = bitcast i8* bitcast (i32* @b to i8*) to i32* +// CHECK-NEXT: load i32, i32* %[[TMP]] // CHECK-NOT: call // CHECK: } int get_b() { return b; } @@ -52,7 +53,8 @@ // LINUX-LABEL: define weak_odr {{.*}} @_ZTW1c() // CHECK-NOT: br i1 // CHECK-NOT: call -// CHECK: ret i32* @c +// CHECK: %[[TMP:.+]] = bitcast i8* bitcast (i32* @c to i8*) to i32* +// CHECK: ret i32* %[[TMP]] // CHECK: } thread_local int c = 0; diff --git a/clang/test/CodeGenCoroutines/coro-tls.cpp b/clang/test/CodeGenCoroutines/coro-tls.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-tls.cpp @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fcoroutines-ts -std=c++14 -O3 -emit-llvm %s -o - | FileCheck %s + +#include "Inputs/coroutine.h" + +namespace coro = std::experimental::coroutines_v1; + +struct awaitable { + bool await_ready() { return false; } + void await_suspend(coro::coroutine_handle<> h); + void await_resume() {} +}; +awaitable switch_to_new_thread(); + +struct task { + struct promise_type { + task get_return_object() { return {}; } + coro::suspend_never initial_suspend() { return {}; } + coro::suspend_never final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() {} + }; +}; + +void check(int *i, int *j); + +thread_local int tls_variable = 0; + +bool non_coroutine() { + auto *i = &tls_variable; + auto *j = &tls_variable; + return i == j; +} + +// CHECK-LABEL: define zeroext i1 @_Z13non_coroutinev() +// CHECK-NEXT: entry: +// CHECK-NEXT: ret i1 true + + +task resuming_on_new_thread() { + auto *i = &tls_variable; + co_await switch_to_new_thread(); + auto *j = &tls_variable; + check(i, j); +} + +// This test checks that two arguments passed to "check" will be different. +// The first one will be a value loaded from the frame, and the second is +// the current address of tsl_variable. + +// CHECK-LABEL: define internal fastcc void @_Z22resuming_on_new_threadv.resume +// CHECK: %[[RELOAD_ADDR:.+reload.addr]] = getelementptr inbounds %_Z22resuming_on_new_threadv.Frame, %_Z22resuming_on_new_threadv.Frame* %FramePtr +// CHECK: %[[TMP:.+]] = bitcast i8** %[[RELOAD_ADDR]] to i32** +// CHECK: %[[RELOAD:.+]] = load i32*, i32** %[[TMP]] +// CHECK: tail call void @_Z5checkPiS_(i32* %[[RELOAD]], i32* nonnull @tls_variable) diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -751,6 +751,9 @@ /// If the pointer isn't i8* it will be converted. CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr); + /// Create a threadlocal intrinsic. + CallInst *CreateThreadLocal(Value *Ptr); + /// Create a call to Masked Load intrinsic LLVM_ATTRIBUTE_DEPRECATED( CallInst *CreateMaskedLoad(Value *Ptr, unsigned Alignment, Value *Mask, diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1306,6 +1306,10 @@ def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + +// Intrinsic to obtain the address of a thread_local variable. +def int_threadlocal : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty]>; + //===---------------- Vector Predication Intrinsics --------------===// // Speculatable Binary operators diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -268,6 +268,7 @@ void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokeLegacyPassPass(PassRegistry&); void initializeLowerSwitchLegacyPassPass(PassRegistry &); +void initializeLowerThreadLocalIntrinsicLegacyPassPass(PassRegistry &); void initializeLowerTypeTestsPass(PassRegistry&); void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &); void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h --- a/llvm/include/llvm/Transforms/Scalar.h +++ b/llvm/include/llvm/Transforms/Scalar.h @@ -383,6 +383,13 @@ // Pass *createLowerMatrixIntrinsicsMinimalPass(); +//===----------------------------------------------------------------------===// +// +// createLowerThreadLocalIntrinsic - Lower threadlocal intrinsics to direct +// reference of the thread_local variable. +// +Pass *createLowerThreadLocalIntrinsicPass(); + //===----------------------------------------------------------------------===// // // LowerWidenableCondition - Lower widenable condition to i1 true. diff --git a/llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h b/llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h @@ -0,0 +1,29 @@ +//===--- LowerThreadLocalIntrinsic.h - Lower the threadlocal intrinsic +//---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers the llvm.threadlocal intrinsic to a direct reference to the +// thread local variable. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H +#define LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct LowerThreadLocalIntrinsicPass + : PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + static bool isRequired() { return true; } +}; + +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -452,6 +452,12 @@ return createCallHelper(TheFn, Ops, this); } +CallInst *IRBuilderBase::CreateThreadLocal(Value *Ptr) { + return CreateIntrinsic( + llvm::Intrinsic::threadlocal, llvm::None, + {CreatePointerBitCastOrAddrSpaceCast(Ptr, getInt8PtrTy())}); +} + CallInst * IRBuilderBase::CreateAssumption(Value *Cond, ArrayRef OpBundles) { diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -181,6 +181,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" +#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h" #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" @@ -1385,6 +1386,8 @@ // Add the core simplification pipeline. MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None)); + MPM.addPass(LowerThreadLocalIntrinsicPass()); + // Now add the optimization pipeline. MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink)); @@ -1836,6 +1839,8 @@ MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass())); } + MPM.addPass(LowerThreadLocalIntrinsicPass()); + for (auto &C : OptimizerLastEPCallbacks) C(MPM, Level); diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -558,6 +558,7 @@ } addExtensionsToPM(EP_EnabledOnOptLevel0, MPM); + MPM.add(createLowerThreadLocalIntrinsicPass()); if (PrepareForLTO || PrepareForThinLTO) { MPM.add(createCanonicalizeAliasesPass()); @@ -669,6 +670,7 @@ // pass manager that we are specifically trying to avoid. To prevent this // we must insert a no-op module pass to reset the pass manager. MPM.add(createBarrierNoopPass()); + MPM.add(createLowerThreadLocalIntrinsicPass()); if (RunPartialInlining) MPM.add(createPartialInliningPass()); diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -50,6 +50,7 @@ LowerExpectIntrinsic.cpp LowerGuardIntrinsic.cpp LowerMatrixIntrinsics.cpp + LowerThreadLocalIntrinsic.cpp LowerWidenableCondition.cpp MakeGuardsExplicit.cpp MemCpyOptimizer.cpp diff --git a/llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp b/llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp @@ -0,0 +1,75 @@ +//===- LowerThreadLocalIntrinsic.cpp - Lower the threadlocal intrinsic +//---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers the llvm.threadlocal intrinsic to a direct reference to the +// thread local variable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Scalar.h" + +using namespace llvm; + +static bool lowerThreadLocalIntrinsic(Module &M) { + // Check if we can cheaply rule out the possibility of not having any work to + // do. + Function *ThreadLocalDecl = + M.getFunction(Intrinsic::getName(Intrinsic::threadlocal)); + if (!ThreadLocalDecl || ThreadLocalDecl->use_empty()) + return false; + + for (auto Itr = ThreadLocalDecl->users().begin(), + E = ThreadLocalDecl->users().end(); + Itr != E;) { + Instruction *I = cast(*Itr); + ++Itr; + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } + + ThreadLocalDecl->eraseFromParent(); + + return true; +} + +PreservedAnalyses +LowerThreadLocalIntrinsicPass::run(Module &M, ModuleAnalysisManager &AM) { + if (lowerThreadLocalIntrinsic(M)) + return PreservedAnalyses::none(); + + return PreservedAnalyses::all(); +} + +namespace { +struct LowerThreadLocalIntrinsicLegacyPass : public ModulePass { + static char ID; + LowerThreadLocalIntrinsicLegacyPass() : ModulePass(ID) { + initializeLowerThreadLocalIntrinsicLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnModule(Module &M) override; +}; +} // namespace + +bool LowerThreadLocalIntrinsicLegacyPass::runOnModule(Module &M) { + return lowerThreadLocalIntrinsic(M); +} + +char LowerThreadLocalIntrinsicLegacyPass::ID = 0; +INITIALIZE_PASS(LowerThreadLocalIntrinsicLegacyPass, + "lower-threadlocal-intrinsic", "Lower ThreadLocal Intrinsics", + false, false) + +Pass *llvm::createLowerThreadLocalIntrinsicPass() { + return new LowerThreadLocalIntrinsicLegacyPass(); +} diff --git a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/opt-pipeline.ll @@ -192,6 +192,7 @@ ; GCN-O1-NEXT: Optimization Remark Emitter ; GCN-O1-NEXT: Combine redundant instructions ; GCN-O1-NEXT: A No-Op Barrier Pass +; GCN-O1-NEXT: Lower ThreadLocal Intrinsics ; GCN-O1-NEXT: CallGraph Construction ; GCN-O1-NEXT: Deduce function attributes in RPO ; GCN-O1-NEXT: Global Variable Optimizer @@ -543,6 +544,7 @@ ; GCN-O2-NEXT: Optimization Remark Emitter ; GCN-O2-NEXT: Combine redundant instructions ; GCN-O2-NEXT: A No-Op Barrier Pass +; GCN-O2-NEXT: Lower ThreadLocal Intrinsics ; GCN-O2-NEXT: Eliminate Available Externally Globals ; GCN-O2-NEXT: CallGraph Construction ; GCN-O2-NEXT: Deduce function attributes in RPO @@ -907,6 +909,7 @@ ; GCN-O3-NEXT: Optimization Remark Emitter ; GCN-O3-NEXT: Combine redundant instructions ; GCN-O3-NEXT: A No-Op Barrier Pass +; GCN-O3-NEXT: Lower ThreadLocal Intrinsics ; GCN-O3-NEXT: Eliminate Available Externally Globals ; GCN-O3-NEXT: CallGraph Construction ; GCN-O3-NEXT: Deduce function attributes in RPO diff --git a/llvm/test/Other/new-pass-manager.ll b/llvm/test/Other/new-pass-manager.ll --- a/llvm/test/Other/new-pass-manager.ll +++ b/llvm/test/Other/new-pass-manager.ll @@ -366,6 +366,7 @@ ; CHECK-EXT-NEXT: Starting llvm::Function pass manager run. ; CHECK-EXT-NEXT: Running pass: {{.*}}Bye ; CHECK-EXT-NEXT: Finished llvm::Function pass manager run. +; CHECK-O0-NEXT: Running pass: LowerThreadLocalIntrinsicPass ; CHECK-O0-NEXT: Finished llvm::Module pass manager run ; RUN: opt -disable-output -disable-verify -debug-pass-manager \ diff --git a/llvm/test/Other/new-pm-O0-defaults.ll b/llvm/test/Other/new-pm-O0-defaults.ll --- a/llvm/test/Other/new-pm-O0-defaults.ll +++ b/llvm/test/Other/new-pm-O0-defaults.ll @@ -32,6 +32,7 @@ ; CHECK-DEFAULT-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-MATRIX-NEXT: Running pass: LowerMatrixIntrinsicsPass ; CHECK-MATRIX-NEXT: Running analysis: TargetIRAnalysis +; CHECK-DEFAULT-NEXT: Running pass: LowerThreadLocalIntrinsicPass ; CHECK-PRE-LINK-NEXT: Running pass: CanonicalizeAliasesPass ; CHECK-PRE-LINK-NEXT: Running pass: NameAnonGlobalPass ; CHECK-THINLTO-NEXT: Running pass: Annotation2MetadataPass diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -209,6 +209,7 @@ ; CHECK-EP-CGSCC-LATE-NEXT: Running pass: NoOpCGSCCPass ; CHECK-O-NEXT: Finished CGSCC pass manager run. ; CHECK-O-NEXT: Finished llvm::Module pass manager run. +; CHECK-O-NEXT: Running pass: LowerThreadLocalIntrinsicPass ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-DEFAULT-NEXT: Running pass: EliminateAvailableExternallyPass diff --git a/llvm/test/Other/opt-O2-pipeline.ll b/llvm/test/Other/opt-O2-pipeline.ll --- a/llvm/test/Other/opt-O2-pipeline.ll +++ b/llvm/test/Other/opt-O2-pipeline.ll @@ -187,6 +187,7 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: A No-Op Barrier Pass +; CHECK-NEXT: Lower ThreadLocal Intrinsics ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll --- a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll +++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll @@ -192,6 +192,7 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: A No-Op Barrier Pass +; CHECK-NEXT: Lower ThreadLocal Intrinsics ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO diff --git a/llvm/test/Other/opt-O3-pipeline.ll b/llvm/test/Other/opt-O3-pipeline.ll --- a/llvm/test/Other/opt-O3-pipeline.ll +++ b/llvm/test/Other/opt-O3-pipeline.ll @@ -192,6 +192,7 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: A No-Op Barrier Pass +; CHECK-NEXT: Lower ThreadLocal Intrinsics ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO diff --git a/llvm/test/Other/opt-Os-pipeline.ll b/llvm/test/Other/opt-Os-pipeline.ll --- a/llvm/test/Other/opt-Os-pipeline.ll +++ b/llvm/test/Other/opt-Os-pipeline.ll @@ -173,6 +173,7 @@ ; CHECK-NEXT: Optimization Remark Emitter ; CHECK-NEXT: Combine redundant instructions ; CHECK-NEXT: A No-Op Barrier Pass +; CHECK-NEXT: Lower ThreadLocal Intrinsics ; CHECK-NEXT: Eliminate Available Externally Globals ; CHECK-NEXT: CallGraph Construction ; CHECK-NEXT: Deduce function attributes in RPO diff --git a/llvm/test/Other/pass-pipelines.ll b/llvm/test/Other/pass-pipelines.ll --- a/llvm/test/Other/pass-pipelines.ll +++ b/llvm/test/Other/pass-pipelines.ll @@ -72,6 +72,7 @@ ; Next we break out of the main Function passes inside the CGSCC pipeline with ; a barrier pass. ; CHECK-O2: A No-Op Barrier Pass +; CHECK-O2-NEXT: Lower ThreadLocal Intrinsics ; CHECK-O2-NEXT: Eliminate Available Externally ; Inferring function attribute should be right after the CGSCC pipeline, before ; any other optimizations/analyses.