Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -2604,6 +2604,15 @@ } llvm::Value *V = CGF.CGM.GetAddrOfGlobalVar(VD); + + if (VD->getTLSKind() != VarDecl::TLS_None && + // We only use @llvm.threadlocal.address if opaque pointers enabled. + // Otherwise, we need to pay for many unnecessary bitcasts. + // + // TODO: Remove this condition once we support opaque pointers only. + CGF.CGM.getCodeGenOpts().OpaquePointers) + V = CGF.Builder.CreateThreadLocalAddress(V); + llvm::Type *RealVarTy = CGF.getTypes().ConvertTypeForMem(VD->getType()); V = EmitBitCastOfLValueToProperType(CGF, V, RealVarTy); CharUnits Alignment = CGF.getContext().getDeclAlign(VD); Index: clang/lib/CodeGen/ItaniumCXXABI.cpp =================================================================== --- clang/lib/CodeGen/ItaniumCXXABI.cpp +++ clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -2990,6 +2990,11 @@ if (Val->getType() != Wrapper->getReturnType()) Val = Builder.CreatePointerBitCastOrAddrSpaceCast( Val, Wrapper->getReturnType(), ""); + + // TODO: Remove this condition once we support opaque pointers only. + if (CGM.getCodeGenOpts().OpaquePointers) + Val = Builder.CreateThreadLocalAddress(Val); + Builder.CreateRet(Val); } } Index: clang/test/CodeGen/lto-newpm-pipeline.c =================================================================== --- clang/test/CodeGen/lto-newpm-pipeline.c +++ clang/test/CodeGen/lto-newpm-pipeline.c @@ -29,6 +29,7 @@ // CHECK-FULL-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis // CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper +// CHECK-FULL-O0-NEXT: Running pass: LowerThreadLocalAddress // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-FULL-O0-NEXT: Running pass: AnnotationRemarksPass @@ -41,6 +42,7 @@ // CHECK-THIN-O0-NEXT: Running analysis: InnerAnalysisManagerProxy // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis // CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper +// CHECK-THIN-O0-NEXT: Running pass: LowerThreadLocalAddress // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-THIN-O0-NEXT: Running pass: AnnotationRemarksPass Index: clang/test/CodeGenCXX/threadlocal_address.cpp =================================================================== --- /dev/null +++ clang/test/CodeGenCXX/threadlocal_address.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -std=c++11 -emit-llvm -triple %itanium_abi_triple -o - %s -disable-llvm-passes | FileCheck %s +// RUN: %clang_cc1 -std=c++11 -emit-llvm -triple %itanium_abi_triple -o - %s | FileCheck %s -check-prefix=CHECK-LLVM-PASSES +// RUN: %clang_cc1 -std=c++11 -emit-llvm -triple %itanium_abi_triple -o - -O1 %s | FileCheck %s -check-prefix=CHECK-O1 +thread_local int i; +int g() { + i++; + return i; +} +// CHECK: @i = thread_local global i32 0 +// +// CHECK: @_Z1gv() +// CHECK-NEXT: entry +// CHECK-NEXT: %[[IA:.+]] = call ptr @llvm.threadlocal.address(ptr @i) +// CHECK-NEXT: %[[VA:.+]] = load i32, ptr %[[IA]] +// CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[VA]], 1 +// CHECK-NEXT: store i32 %[[INC]], ptr %[[IA]], align 4 +// CHECK-NEXT: %[[IA2:.+]] = call ptr @llvm.threadlocal.address(ptr @i) +// CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[IA2]], align 4 +// CHECK-NEXT: ret i32 %[[RET]] +// +// CHECK: declare ptr @llvm.threadlocal.address(ptr) #[[ATTR_NUM:.+]] +// +// CHECK: attributes #[[ATTR_NUM]] = { nounwind readnone willreturn } + +// CHECK-LLVM-PASSES-NOT: llvm.threadlocal.address + +// CHECK-O1-LABEL: @_Z1gv +// CHECK-O1-NEXT: entry: +// CHECK-O1-NEXT: %[[VAL:.+]] = load i32, ptr @i +// CHECK-O1-NEXT: %[[INC:.+]] = add nsw i32 %[[VAL]], 1 +// CHECK-O1-NEXT: store i32 %[[INC]], ptr @i +// CHECK-O1-NEXT: ret i32 %[[INC]] Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -24372,6 +24372,36 @@ mask argument does not match the pointer size of the target, the mask is zero-extended or truncated accordingly. +.. _int_threadlocal_address: + +'``llvm.threadlocal.address``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn + +Arguments: +"""""""""" + +The first argument is pointer, which refers to a thread local variable. + +Overview: +"""""""""" + +The LLVM treated the address of thread local variable as a constant expression. +But it is not true. The ``llvm.threadlocal.address`` intrinsic would represent +the address of the thread local variable. + +Semantics: +"""""""""" + +The ``llvm.threadlocal.address`` intrinsic would return the address of the +corresponding thread local variable. + .. _int_vscale: '``llvm.vscale``' Intrinsic Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -639,6 +639,7 @@ case Intrinsic::coro_align: case Intrinsic::coro_suspend: case Intrinsic::coro_subfn_addr: + case Intrinsic::threadlocal_address: // These intrinsics don't actually represent code after lowering. return 0; } Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -738,6 +738,9 @@ /// If the pointer isn't i8* it will be converted. CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr); + /// Create a call to llvm.threadlocal.address intrinsic. + CallInst *CreateThreadLocalAddress(Value *Ptr); + /// Create a call to Masked Load intrinsic CallInst *CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment, Value *Mask, Value *PassThru = nullptr, const Twine &Name = ""); Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1390,6 +1390,9 @@ def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty], [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_threadlocal_address : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], + [IntrNoMem, IntrWillReturn]>; + def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; Index: llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h @@ -0,0 +1,28 @@ +//===- LowerThreadLocalIntrinsic.h - Lower threadlocal intrinsics. -*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers threadlocal.addresses intrinsics down to a direct use of +// thread local variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H +#define LLVM_TRANSFORMS_SCALAR_LOWERTHREADLOCALINTRINSIC_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class LowerThreadLocalAddress : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); + static bool isRequired() { return true; } +}; +} // namespace llvm + +#endif Index: llvm/lib/IR/IRBuilder.cpp =================================================================== --- llvm/lib/IR/IRBuilder.cpp +++ llvm/lib/IR/IRBuilder.cpp @@ -499,6 +499,11 @@ return createCallHelper(TheFn, Ops, this); } +CallInst *IRBuilderBase::CreateThreadLocalAddress(Value *Ptr) { + return CreateIntrinsic(llvm::Intrinsic::threadlocal_address, llvm::None, + {Ptr}); +} + CallInst * IRBuilderBase::CreateAssumption(Value *Cond, ArrayRef OpBundles) { Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -191,6 +191,7 @@ #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" +#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h" #include "llvm/Transforms/Scalar/LowerWidenableCondition.h" #include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -104,6 +104,7 @@ #include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h" +#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h" #include "llvm/Transforms/Scalar/MemCpyOptimizer.h" #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" #include "llvm/Transforms/Scalar/NewGVN.h" @@ -1288,6 +1289,8 @@ if (!LTOPreLink) MPM.addPass(RelLookupTableConverterPass()); + MPM.addPass(LowerThreadLocalAddress()); + return MPM; } @@ -1840,6 +1843,8 @@ CoroPM.addPass(GlobalDCEPass()); MPM.addPass(CoroConditionalWrapper(std::move(CoroPM))); + MPM.addPass(LowerThreadLocalAddress()); + for (auto &C : OptimizerLastEPCallbacks) C(MPM, Level); Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -80,6 +80,7 @@ MODULE_PASS("iroutliner", IROutlinerPass()) MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs())) MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass()) +MODULE_PASS("lower-threadlocal-address", LowerThreadLocalAddress()) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) MODULE_PASS("metarenamer", MetaRenamerPass()) MODULE_PASS("mergefunc", MergeFunctionsPass()) Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -51,6 +51,7 @@ LowerExpectIntrinsic.cpp LowerGuardIntrinsic.cpp LowerMatrixIntrinsics.cpp + LowerThreadLocalIntrinsic.cpp LowerWidenableCondition.cpp MakeGuardsExplicit.cpp MemCpyOptimizer.cpp Index: llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/LowerThreadLocalIntrinsic.cpp @@ -0,0 +1,40 @@ +//===- LowerThreadLocalAddress.cpp - Lower threadlocal intrinsics. -*- C++ +//-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers threadlocal.addresses intrinsics down to a direct use of +// thread local variables. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/LowerThreadLocalIntrinsic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" + +using namespace llvm; + +PreservedAnalyses LowerThreadLocalAddress::run(Module &M, + ModuleAnalysisManager &MAM) { + Function *ThreadLocalDecl = + Intrinsic::getDeclaration(&M, Intrinsic::threadlocal_address); + + bool Changed = !ThreadLocalDecl->use_empty(); + + for (User *U : make_early_inc_range(ThreadLocalDecl->users())) { + auto *I = cast(U); + I->replaceAllUsesWith(I->getOperand(0)); + I->eraseFromParent(); + } + + ThreadLocalDecl->eraseFromParent(); + + PreservedAnalyses PA; + PA.preserveSet(); + return Changed ? PA : PreservedAnalyses::all(); +} Index: llvm/test/Other/new-pm-O0-defaults.ll =================================================================== --- llvm/test/Other/new-pm-O0-defaults.ll +++ llvm/test/Other/new-pm-O0-defaults.ll @@ -39,6 +39,7 @@ ; CHECK-MATRIX: Running pass: LowerMatrixIntrinsicsPass ; CHECK-MATRIX-NEXT: Running analysis: TargetIRAnalysis ; CHECK-CORO-NEXT: Running pass: CoroConditionalWrapper +; CHECK-CORO-NEXT: Running pass: LowerThreadLocalAddress ; CHECK-PRE-LINK: Running pass: CanonicalizeAliasesPass ; CHECK-PRE-LINK-NEXT: Running pass: NameAnonGlobalPass ; CHECK-THINLTO: Running pass: Annotation2MetadataPass Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -270,6 +270,7 @@ ; CHECK-O-NEXT: Running pass: ConstantMergePass ; CHECK-DEFAULT-NEXT: Running pass: RelLookupTableConverterPass ; CHECK-LTO-NOT: Running pass: RelLookupTableConverterPass +; CHECK-O-NEXT: Running pass: LowerThreadLocalAddress ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-LTO-NEXT: Running pass: CanonicalizeAliasesPass ; CHECK-LTO-NEXT: Running pass: NameAnonGlobalPass Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -228,6 +228,7 @@ ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-POSTLINK-O-NEXT: Running pass: ConstantMergePass ; CHECK-POSTLINK-O-NEXT: Running pass: RelLookupTableConverterPass +; CHECK-POSTLINK-O-NEXT: Running pass: LowerThreadLocalAddress ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-PRELINK-O-NEXT: Running pass: CanonicalizeAliasesPass ; CHECK-PRELINK-O-NEXT: Running pass: NameAnonGlobalPass Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -197,6 +197,7 @@ ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass ; CHECK-O-NEXT: Running pass: RelLookupTableConverterPass +; CHECK-O-NEXT: Running pass: LowerThreadLocalAddress ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-O-NEXT: Running pass: PrintModulePass Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -209,6 +209,7 @@ ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: ConstantMergePass ; CHECK-O-NEXT: Running pass: RelLookupTableConverterPass +; CHECK-O-NEXT: Running pass: LowerThreadLocalAddress ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo ; CHECK-O-NEXT: Running pass: PrintModulePass Index: llvm/test/Transforms/LowerThreadLocalIntrinsic/basic.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LowerThreadLocalIntrinsic/basic.ll @@ -0,0 +1,25 @@ +; RUN: opt -passes=lower-threadlocal-address -opaque-pointers -S -o - < %s | FileCheck %s + +@i = thread_local global i32 0, align 4 + +define dso_local noundef i32 @foo() { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 +; CHECK-NEXT: store i32 [[INC]], ptr @i, align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr @i, align 4 +; CHECK-NEXT: ret i32 [[TMP1]] +; +; CHECK-NOT: llvm.threadlocal.address +entry: + %0 = call ptr @llvm.threadlocal.address(ptr @i) + %1 = load i32, ptr %0, align 4 + %inc = add nsw i32 %1, 1 + store i32 %inc, ptr %0, align 4 + %2 = call ptr @llvm.threadlocal.address(ptr @i) + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +declare ptr @llvm.threadlocal.address(ptr) nounwind readnone willreturn