diff --git a/lld/test/MachO/lto-internalize-unnamed-addr.ll b/lld/test/MachO/lto-internalize-unnamed-addr.ll --- a/lld/test/MachO/lto-internalize-unnamed-addr.ll +++ b/lld/test/MachO/lto-internalize-unnamed-addr.ll @@ -23,9 +23,9 @@ ; RUN: %lld -lSystem -dylib %t/test.thinlto.o %t/test2.thinlto.o -o \ ; RUN: %t/test.thinlto.dylib -save-temps -; RUN: llvm-dis < %t/test.thinlto.o.2.internalize.bc | FileCheck %s --check-prefix=THINLTO-BC +; RUN: llvm-dis < %t/test.thinlto.o.2.internalize.bc | FileCheck %s --check-prefix=THINLTO-BC-DYLIB ; RUN: llvm-dis < %t/test2.thinlto.o.2.internalize.bc | FileCheck %s --check-prefix=THINLTO-BC-2 -; RUN: llvm-nm -m %t/test.thinlto.dylib | FileCheck %s --check-prefix=THINLTO +; RUN: llvm-nm -m %t/test.thinlto.dylib | FileCheck %s --check-prefix=THINLTO-DYLIB ; LTO-BC-DAG: @global_unnamed = internal unnamed_addr global i8 42 ; LTO-BC-DAG: @global_unnamed_sometimes_linkonce = internal unnamed_addr global i8 42 @@ -41,12 +41,19 @@ ; LTO-BC-DYLIB-DAG: @local_unnamed_always_const = internal constant i8 42 ; LTO-BC-DYLIB-DAG: @local_unnamed_sometimes_const = weak_odr constant i8 42 -; THINLTO-BC-DAG: @global_unnamed = weak_odr hidden unnamed_addr global i8 42 +; THINLTO-BC-DAG: @global_unnamed = internal unnamed_addr global i8 42 ; THINLTO-BC-DAG: @global_unnamed_sometimes_linkonce = weak_odr unnamed_addr global i8 42 -; THINLTO-BC-DAG: @local_unnamed_const = weak_odr hidden local_unnamed_addr constant i8 42 +; THINLTO-BC-DAG: @local_unnamed_const = internal local_unnamed_addr constant i8 42 ; THINLTO-BC-DAG: @local_unnamed_always_const = weak_odr hidden local_unnamed_addr constant i8 42 ; THINLTO-BC-DAG: @local_unnamed_sometimes_const = weak_odr local_unnamed_addr constant i8 42 -; THINLTO-BC-DAG: @local_unnamed = weak_odr local_unnamed_addr global i8 42 +; THINLTO-BC-DAG: @local_unnamed = internal local_unnamed_addr global i8 42 + +; THINLTO-BC-DYLIB-DAG: @global_unnamed = internal unnamed_addr global i8 42 +; THINLTO-BC-DYLIB-DAG: @global_unnamed_sometimes_linkonce = weak_odr unnamed_addr global i8 42 +; THINLTO-BC-DYLIB-DAG: @local_unnamed_const = internal local_unnamed_addr constant i8 42 +; THINLTO-BC-DYLIB-DAG: @local_unnamed_always_const = weak_odr hidden local_unnamed_addr constant i8 42 +; THINLTO-BC-DYLIB-DAG: @local_unnamed_sometimes_const = weak_odr local_unnamed_addr constant i8 42 +; THINLTO-BC-DYLIB-DAG: @local_unnamed = weak_odr local_unnamed_addr global i8 42 ; THINLTO-BC-2-DAG: @global_unnamed_sometimes_linkonce = available_externally unnamed_addr global i8 42 ; THINLTO-BC-2-DAG: @local_unnamed_always_const = available_externally local_unnamed_addr constant i8 42 @@ -73,18 +80,25 @@ ; LTO-DYLIB-DAG: (__TEXT,__const) non-external _local_unnamed_const ; LTO-DYLIB-DAG: (__TEXT,__const) weak external _local_unnamed_sometimes_const -; THINLTO-DAG: (__DATA,__data) non-external (was a private external) _global_unnamed -;; FIXME: These next two symbols should probably be internalized, just like they -;; are under fullLTO. +; THINLTO-DAG: (__DATA,__data) non-external _global_unnamed +;; FIXME: This next symbol should probably be internalized, just like it is +;; under fullLTO. ; THINLTO-DAG: (__DATA,__data) weak external _global_unnamed_sometimes_linkonce -; THINLTO-DAG: (__DATA,__data) weak external _local_unnamed +; THINLTO-DAG: (__DATA,__data) non-external _local_unnamed ; THINLTO-DAG: (__TEXT,__const) non-external (was a private external) _local_unnamed_always_const -; THINLTO-DAG: (__TEXT,__const) non-external (was a private external) _local_unnamed_const +; THINLTO-DAG: (__TEXT,__const) non-external _local_unnamed_const ;; LD64 actually fails to link when the following symbol is included in the test ;; input, instead producing this error: ;; reference to bitcode symbol '_local_unnamed_sometimes_const' which LTO has not compiled in '_used' from /tmp/lto.o for architecture x86_64 ; THINLTO-DAG: (__TEXT,__const) weak external _local_unnamed_sometimes_const +; THINLTO-DYLIB-DAG: (__DATA,__data) non-external _global_unnamed +; THINLTO-DYLIB-DAG: (__DATA,__data) weak external _global_unnamed_sometimes_linkonce +; THINLTO-DYLIB-DAG: (__DATA,__data) weak external _local_unnamed +; THINLTO-DYLIB-DAG: (__TEXT,__const) non-external (was a private external) _local_unnamed_always_const +; THINLTO-DYLIB-DAG: (__TEXT,__const) non-external _local_unnamed_const +; THINLTO-DYLIB-DAG: (__TEXT,__const) weak external _local_unnamed_sometimes_const + ;--- test.ll target triple = "x86_64-apple-darwin" target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -450,6 +450,12 @@ ValueInfo VI, function_ref isExported, function_ref isPrevailing) { + auto ExternallyVisibleCopies = + llvm::count_if(VI.getSummaryList(), + [](const std::unique_ptr &Summary) { + return !GlobalValue::isLocalLinkage(Summary->linkage()); + }); + for (auto &S : VI.getSummaryList()) { // First see if we need to promote an internal value because it is not // exported. @@ -480,15 +486,50 @@ if (GlobalValue::isInterposableLinkage(S->linkage()) && !IsPrevailing) continue; - // Functions and read-only variables with linkonce_odr and weak_odr linkage - // can be internalized. We can't internalize linkonce_odr and weak_odr - // variables which are both modified and read somewhere in the program - // because reads and writes will become inconsistent. - auto *VarSummary = dyn_cast(S->getBaseObject()); - if (VarSummary && !VarSummary->maybeReadOnly() && - !VarSummary->maybeWriteOnly() && - (VarSummary->linkage() == GlobalValue::WeakODRLinkage || - VarSummary->linkage() == GlobalValue::LinkOnceODRLinkage)) + // Non-exported functions and variables with linkonce_odr or weak_odr + // linkage can be internalized in certain cases. The minimum legality + // requirements would be that they are not address taken to ensure that we + // don't break pointer equality checks, and that variables are either read- + // or write-only. For functions, this is the case if either all copies are + // [local_]unnamed_addr, or we can propagate reference edge attributes + // (which is how this is guaranteed for variables, when analyzing whether + // they are read or write-only). + // + // However, we only get to this code for weak/linkonce ODR values in one of + // two cases: + // 1) The prevailing copy is not in IR (it is in native code). + // 2) The prevailing copy in IR is not exported from its module. + // Additionally, at least for the new LTO API, case 2 will only happen if + // there is exactly one definition of the value (i.e. in exactly one + // module), as duplicate defs are result in the value being marked exported. + // Likely, users of the legacy LTO API are similar, however, currently there + // are llvm-lto based tests of the legacy LTO API that do not mark + // duplicate linkonce_odr copies as exported via the tool, so we need + // to handle that case below by checking the number of copies. + // + // Generally, we only want to internalize a linkonce/weak ODR value in case + // 2, because in case 1 we cannot see how the value is used to know if it + // is read or write-only. We also don't want to bloat the binary with + // multiple internalized copies of non-prevailing linkonce_odr functions. + // Note if we don't internalize, we will convert non-prevailing copies to + // available_externally anyway, so that we drop them after inlining. The + // only reason to internalize such a function is if we indeed have a single + // copy, because internalizing it won't increase binary size, and enables + // use of inliner heuristics that are more aggressive in the face of a + // single call to a static (local). For variables, internalizing a read or + // write only variable can enable more aggressive optimization. However, we + // already perform this elsewhere in the ThinLTO backend handling for + // read or write-only variables (processGlobalForThinLTO). + // + // Therefore, only internalize linkonce/weak ODR if there is a single copy, + // that is prevailing in this IR module. We can do so aggressively, without + // requiring the address to be insignificant, or that a variable be read or + // write-only. + if ((S->linkage() == GlobalValue::WeakODRLinkage || + S->linkage() == GlobalValue::LinkOnceODRLinkage) && + // We can have only one copy in ThinLTO that isn't prevailing, if the + // prevailing copy is in a native object. + (!IsPrevailing || ExternallyVisibleCopies > 1)) continue; S->setLinkage(GlobalValue::InternalLinkage); diff --git a/llvm/test/ThinLTO/X86/not-internalized.ll b/llvm/test/ThinLTO/X86/not-internalized.ll --- a/llvm/test/ThinLTO/X86/not-internalized.ll +++ b/llvm/test/ThinLTO/X86/not-internalized.ll @@ -6,7 +6,7 @@ ; RUN: opt -module-summary %s -o %t.bc ; RUN: llvm-lto2 run -save-temps %t.bc -o %t.out \ ; RUN: -r=%t.bc,foo,plx \ -; RUN: -r=%t.bc,bar,lx +; RUN: -r=%t.bc,bar,pl ; Check that we don't internalize `bar` during promotion, ; because foo and bar are members of the same comdat diff --git a/llvm/test/ThinLTO/X86/weak_externals.ll b/llvm/test/ThinLTO/X86/weak_externals.ll --- a/llvm/test/ThinLTO/X86/weak_externals.ll +++ b/llvm/test/ThinLTO/X86/weak_externals.ll @@ -11,8 +11,13 @@ ; CHECK: @_ZZN9SingletonI1SE11getInstanceEvE8instance = available_externally dso_local global %struct.S zeroinitializer ; CHECK: @_ZZN9SingletonI1SE11getInstanceEvE13instance_weak = available_externally dso_local global ptr null, align 8 -; CHECK: define linkonce_odr dso_local dereferenceable(16) ptr @_ZN9SingletonI1SE11getInstanceEv() comdat -; INTERNALIZE: define internal dereferenceable(16) ptr @_ZN9SingletonI1SE11getInstanceEv() + +;; We should not internalize a linkonce_odr function when the IR definition(s) +;; are not prevailing (prevailing def in native object). This can break function +;; pointer equality (unless it has an unnamed_addr attribute indicating that the +;; address is not significant), and also can increase code size. +; CHECK: define available_externally dso_local dereferenceable(16) ptr @_ZN9SingletonI1SE11getInstanceEv() +; INTERNALIZE: define available_externally dso_local dereferenceable(16) ptr @_ZN9SingletonI1SE11getInstanceEv() target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/ThinLTO/X86/weak_resolution.ll b/llvm/test/ThinLTO/X86/weak_resolution.ll --- a/llvm/test/ThinLTO/X86/weak_resolution.ll +++ b/llvm/test/ThinLTO/X86/weak_resolution.ll @@ -1,33 +1,66 @@ -; Do setup work for all below tests: generate bitcode and combined index +;; Test to ensure we properly resolve weak symbols and internalize them when +;; appropriate. + ; RUN: opt -module-summary %s -o %t.bc ; RUN: opt -module-summary %p/Inputs/weak_resolution.ll -o %t2.bc -; RUN: llvm-lto -thinlto-action=thinlink -o %t3.bc %t.bc %t2.bc -; Verify that prevailing weak for linker symbol is selected across modules, -; non-prevailing ODR are not kept when possible, but non-ODR non-prevailing -; are not affected. +;; First try this with the legacy LTO API +; RUN: llvm-lto -thinlto-action=thinlink -o %t3.bc %t.bc %t2.bc +;; Verify that prevailing weak for linker symbol is selected across modules, +;; non-prevailing ODR are not kept when possible, but non-ODR non-prevailing +;; are not affected. ; RUN: llvm-lto -thinlto-action=promote %t.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=MOD1 ; RUN: llvm-lto -thinlto-action=internalize %t.bc -thinlto-index=%t3.bc -exported-symbol=_linkoncefunc -o - | llvm-dis -o - | FileCheck %s --check-prefix=MOD1-INT ; RUN: llvm-lto -thinlto-action=promote %t2.bc -thinlto-index=%t3.bc -o - | llvm-dis -o - | FileCheck %s --check-prefix=MOD2 ; When exported, we always preserve a linkonce ; RUN: llvm-lto -thinlto-action=promote %t.bc -thinlto-index=%t3.bc -o - --exported-symbol=_linkonceodrfuncInSingleModule | llvm-dis -o - | FileCheck %s --check-prefix=EXPORTED +;; Now try this with the new LTO API +; RUN: llvm-lto2 run %t.bc %t2.bc -o %t3.out -save-temps \ +; RUN: -r %t.bc,_linkonceodralias,pl \ +; RUN: -r %t.bc,_linkoncealias,pl \ +; RUN: -r %t.bc,_linkonceodrvarInSingleModule,pl \ +; RUN: -r %t.bc,_weakodrvarInSingleModule,pl \ +; RUN: -r %t.bc,_linkonceodrfuncwithalias,pl \ +; RUN: -r %t.bc,_linkoncefuncwithalias,pl \ +; RUN: -r %t.bc,_linkonceodrfunc,pl \ +; RUN: -r %t.bc,_linkoncefunc,pl \ +; RUN: -r %t.bc,_weakodrfunc,pl \ +; RUN: -r %t.bc,_weakfunc,pl \ +; RUN: -r %t.bc,_linkonceodrfuncInSingleModule,pl \ +; RUN: -r %t2.bc,_linkonceodrfuncwithalias,l \ +; RUN: -r %t2.bc,_linkoncefuncwithalias,l \ +; RUN: -r %t2.bc,_linkonceodrfunc,l \ +; RUN: -r %t2.bc,_linkoncefunc,l \ +; RUN: -r %t2.bc,_weakodrfunc,l \ +; RUN: -r %t2.bc,_weakfunc,l \ +; RUN: -r %t2.bc,_linkonceodralias,l \ +; RUN: -r %t2.bc,_linkoncealias,l +; RUN: llvm-dis %t3.out.1.2.internalize.bc -o - | FileCheck %s --check-prefix=MOD1-INT + target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" -; Alias are resolved, but can't be turned into "available_externally" +;; Alias are resolved, but can't be turned into "available_externally" ; MOD1: @linkonceodralias = weak_odr alias void (), ptr @linkonceodrfuncwithalias ; MOD2: @linkonceodralias = linkonce_odr alias void (), ptr @linkonceodrfuncwithalias @linkonceodralias = linkonce_odr alias void (), ptr @linkonceodrfuncwithalias -; Alias are resolved, but can't be turned into "available_externally" +;; Alias are resolved, but can't be turned into "available_externally" ; MOD1: @linkoncealias = weak alias void (), ptr @linkoncefuncwithalias ; MOD2: @linkoncealias = linkonce alias void (), ptr @linkoncefuncwithalias @linkoncealias = linkonce alias void (), ptr @linkoncefuncwithalias -; Function with an alias are resolved to weak_odr in prevailing module, but -; not optimized in non-prevailing module (illegal to have an -; available_externally aliasee). +;; Non-exported linkonce/weak variables can always be internalized, regardless +;; of whether they are const or *unnamed_addr. +; MOD1-INT: @linkonceodrvarInSingleModule = internal global +; MOD1-INT: @weakodrvarInSingleModule = internal global +@linkonceodrvarInSingleModule = linkonce_odr dso_local global ptr null, align 8 +@weakodrvarInSingleModule = weak_odr dso_local global ptr null, align 8 + +;; Function with an alias are resolved to weak_odr in prevailing module, but +;; not optimized in non-prevailing module (illegal to have an +;; available_externally aliasee). ; MOD1: define weak_odr void @linkonceodrfuncwithalias() ; MOD2: define linkonce_odr void @linkonceodrfuncwithalias() define linkonce_odr void @linkonceodrfuncwithalias() #0 { @@ -35,9 +68,9 @@ ret void } -; Function with an alias are resolved to weak in prevailing module, but -; not optimized in non-prevailing module (illegal to have an -; available_externally aliasee). +;; Function with an alias are resolved to weak in prevailing module, but +;; not optimized in non-prevailing module (illegal to have an +;; available_externally aliasee). ; MOD1: define weak void @linkoncefuncwithalias() ; MOD2: define linkonce void @linkoncefuncwithalias() define linkonce void @linkoncefuncwithalias() #0 { @@ -52,7 +85,8 @@ ret void } ; MOD1: define weak void @linkoncefunc() -; MOD1-INT: define weak void @linkoncefunc() +;; New LTO API will use dso_local +; MOD1-INT: define weak{{.*}} void @linkoncefunc() ; MOD2: declare void @linkoncefunc() define linkonce void @linkoncefunc() #0 { entry: @@ -71,6 +105,9 @@ ret void } +;; A linkonce_odr with a single, non-exported, def can be safely +;; internalized without increasing code size or being concerned +;; about affecting function pointer equality. ; MOD1: define weak_odr void @linkonceodrfuncInSingleModule() ; MOD1-INT: define internal void @linkonceodrfuncInSingleModule() ; EXPORTED: define weak_odr void @linkonceodrfuncInSingleModule()