diff --git a/lld/test/ELF/lto/Inputs/devirt_vcall_vis_shared_def.ll b/lld/test/ELF/lto/Inputs/devirt_vcall_vis_shared_def.ll new file mode 100644 --- /dev/null +++ b/lld/test/ELF/lto/Inputs/devirt_vcall_vis_shared_def.ll @@ -0,0 +1,19 @@ +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } + +@_ZTV1A = unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !vcall_visibility !1 + +define i32 @_ZN1A1fEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 0} diff --git a/lld/test/ELF/lto/devirt_vcall_vis_shared_def.ll b/lld/test/ELF/lto/devirt_vcall_vis_shared_def.ll new file mode 100644 --- /dev/null +++ b/lld/test/ELF/lto/devirt_vcall_vis_shared_def.ll @@ -0,0 +1,102 @@ +; REQUIRES: x86 +;; Test that symbols defined in shared libraries prevent devirtualization. + +;; First check that we get devirtualization when the defs are in the +;; LTO unit. + +;; Index based WPD +;; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt --thinlto-bc -o %t1a.o %s +; RUN: opt --thinlto-bc -o %t2a.o %S/Inputs/devirt_vcall_vis_shared_def.ll +; RUN: ld.lld %t1a.o %t2a.o -o %t3a -save-temps --lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t1a.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +;; Hybrid WPD +;; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt --thinlto-bc --thinlto-split-lto-unit -o %t1b.o %s +; RUN: opt --thinlto-bc --thinlto-split-lto-unit -o %t2b.o %S/Inputs/devirt_vcall_vis_shared_def.ll +; RUN: ld.lld %t1b.o %t2b.o -o %t3b -save-temps --lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t1b.o.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +;; Regular LTO WPD +; RUN: opt -o %t1c.o %s +; RUN: opt -o %t2c.o %S/Inputs/devirt_vcall_vis_shared_def.ll +; RUN: ld.lld %t1c.o %t2c.o -o %t3c -save-temps --lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3c.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi + +;; Check that WPD fails with when linking against a shared library +;; containing the strong defs of the vtables. +; RUN: ld.lld %t2c.o -o %t.so -shared + +;; Index based WPD +; RUN: ld.lld %t1a.o %t.so -o %t4a --lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. 2>&1 | count 0 + +;; Hybrid WPD +; RUN: ld.lld %t1b.o %t.so -o %t4b --lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. 2>&1 | count 0 + +;; Regular LTO WPD +; RUN: ld.lld %t1c.o %t.so -o %t4c --lto-whole-program-visibility \ +; RUN: -mllvm -pass-remarks=. 2>&1 | count 0 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } + +@_ZTV1A = available_externally unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !vcall_visibility !2 +@_ZTV1B = linkonce_odr unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1, !vcall_visibility !2 + +;; Prevent the vtables from being dead code eliminated. +@llvm.used = appending global [2 x i8*] [ i8* bitcast ( { [4 x i8*] }* @_ZTV1A to i8*), i8* bitcast ( { [4 x i8*] }* @_ZTV1B to i8*)] + +; CHECK-IR-LABEL: define dso_local i32 @_start +define i32 @_start(%struct.A* %obj, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ;; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; CHECK-NODEVIRT-IR: %call = tail call i32 %fptr1 + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + ret i32 %call +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define available_externally i32 @_ZN1A1fEi(%struct.A* %this, i32 %a) #0 { + ret i32 0 +} + +define available_externally i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0 +} + +define linkonce_odr i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0 +} + +;; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 0} diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -275,6 +275,11 @@ ValueToValueMapTy VMap; std::unique_ptr MergedM( CloneModule(M, VMap, [&](const GlobalValue *GV) -> bool { + // Clone any llvm.*used globals to ensure the included values are + // not deleted. + if (GV->getName() == "llvm.used" || + GV->getName() == "llvm.compiler.used") + return true; if (const auto *C = GV->getComdat()) if (MergedMComdats.count(C)) return true; diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -994,10 +994,10 @@ std::vector &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo, uint64_t ByteOffset) { for (const TypeIdOffsetVtableInfo &P : TIdInfo) { - // Find the first non-available_externally linkage vtable initializer. + // Find a representative copy of the vtable initializer. // We can have multiple available_externally, linkonce_odr and weak_odr - // vtable initializers, however we want to skip available_externally as they - // do not have type metadata attached, and therefore the summary will not + // vtable initializers, however currently clang does not attach type + // metadata to available_externally, and therefore the summary will not // contain any vtable functions. We can also have multiple external // vtable initializers in the case of comdats, which we cannot check here. // The linker should give an error in this case. @@ -1014,14 +1014,22 @@ return false; LocalFound = true; } - if (!GlobalValue::isAvailableExternallyLinkage(S->linkage())) { - VS = cast(S->getBaseObject()); + auto *CurVS = cast(S->getBaseObject()); + if (!CurVS->vTableFuncs().empty()) { + VS = CurVS; // We cannot perform whole program devirtualization analysis on a vtable // with public LTO visibility. if (VS->getVCallVisibility() == GlobalObject::VCallVisibilityPublic) return false; - } + } else + // Currently clang will not attach the necessary type metadata to + // available_externally vtables. + assert(GlobalValue::isAvailableExternallyLinkage(S->linkage())); } + // There will be no VS if all copies are available_externally having no + // type metadata. In that case we can't safely perform WPD. + if (!VS) + return false; if (!VS->isLive()) continue; for (auto VTP : VS->vTableFuncs()) {