diff --git a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll --- a/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll +++ b/clang/test/CodeGen/thinlto-distributed-cfi-devirt.ll @@ -85,8 +85,8 @@ %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* ; Check that the call was devirtualized. - ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi - ; SKIP-IR-NOT: %call = tail call i32 @_ZN1A1nEi + ; CHECK-IR: %call = tail call i32 bitcast (void ()* @_ZN1A1nEi + ; SKIP-IR-NOT: %call = tail call i32 {{.*}}@_ZN1A1nEi %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) %vtable16 = load i8*, i8** %0 %5 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable16, i32 0, metadata !"_ZTS1A") diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -543,20 +543,30 @@ static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals, const ModuleSummaryIndex &Index) { - std::vector DeadGVs; - for (auto &GV : Mod.global_values()) - if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID())) + std::vector Declarations; + for (auto &GV : Mod.global_values()) { + if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID())) { if (!Index.isGlobalValueLive(GVS)) { - DeadGVs.push_back(&GV); + Declarations.push_back(&GV); convertToDeclaration(GV); } + } else if (GV.isDeclaration()) + // Check if any pre-existing declarations are unused after dead + // definitions are removed. + Declarations.push_back(&GV); + } - // Now that all dead bodies have been dropped, delete the actual objects - // themselves when possible. - for (GlobalValue *GV : DeadGVs) { + // Now that all dead bodies have been dropped, delete the declarations + // themselves when possible. Note this is important to avoid introducing + // calls to these functions during indirect call promotion, e.g. when using a + // sample profile from a different binary or different version of the source, + // that may reference virtual functions not live in this binary. + for (GlobalValue *GV : Declarations) { GV->removeDeadConstantUsers(); - // Might reference something defined in native object (i.e. dropped a - // non-prevailing IR def, but we need to keep the declaration). + // Might have a reference to something defined in native object + // (i.e. dropped a non-prevailing IR def), so we need to keep the + // declaration. In this case there is a definition somewhere, so + // keeping the declaration should not cause any issues. if (GV->use_empty()) GV->eraseFromParent(); } diff --git a/llvm/test/LTO/Resolution/X86/load-sample-prof-icp.ll b/llvm/test/LTO/Resolution/X86/load-sample-prof-icp.ll --- a/llvm/test/LTO/Resolution/X86/load-sample-prof-icp.ll +++ b/llvm/test/LTO/Resolution/X86/load-sample-prof-icp.ll @@ -23,6 +23,11 @@ %3 = load void ()*, void ()** %2 ; CHECK: call void @bar(),{{.*}}!prof call void %3(), !dbg !10 + ; Call bar directly so we don't eliminate the unused declaration before ICP, + ; which will block it. + ; CHECK: if.end.icp: + ; CHECK-NEXT: call void @bar() + call void @bar() ret void } diff --git a/llvm/test/ThinLTO/X86/cfi-devirt.ll b/llvm/test/ThinLTO/X86/cfi-devirt.ll --- a/llvm/test/ThinLTO/X86/cfi-devirt.ll +++ b/llvm/test/ThinLTO/X86/cfi-devirt.ll @@ -97,7 +97,7 @@ %4 = bitcast i8* %3 to i32 (%struct.A*, i32)* ; Check that the call was devirtualized. - ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + ; CHECK-IR: %call = tail call i32 bitcast (void ()* @_ZN1A1nEi %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a) %vtable16 = load i8*, i8** %0 %5 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable16, i32 0, metadata !"_ZTS1A") diff --git a/llvm/test/ThinLTO/X86/devirt-after-icp.ll b/llvm/test/ThinLTO/X86/devirt-after-icp.ll --- a/llvm/test/ThinLTO/X86/devirt-after-icp.ll +++ b/llvm/test/ThinLTO/X86/devirt-after-icp.ll @@ -120,7 +120,7 @@ %8 = load i32 (%class.B*)*, i32 (%class.B*)** %vfn.i, align 8 ; Call to bar() can be devirtualized to call to B::bar(), since it was ; inlined from B::foo() after ICP introduced the guarded promotion. -; CHECK-IR: %call.i = tail call i32 @_ZN1B3barEv(%class.B* %3) +; CHECK-IR: %call.i = tail call i32 bitcast (void ()* @_ZN1B3barEv to i32 (%class.B*)*)(%class.B* %3) %call.i = tail call i32 %8(%class.B* %5) br label %if.end.icp diff --git a/llvm/test/ThinLTO/X86/index-const-prop-alias.ll b/llvm/test/ThinLTO/X86/index-const-prop-alias.ll --- a/llvm/test/ThinLTO/X86/index-const-prop-alias.ll +++ b/llvm/test/ThinLTO/X86/index-const-prop-alias.ll @@ -15,16 +15,13 @@ ; RUN: %t2.bc -r=%t2.bc,g,pl -r=%t2.bc,g.alias,plx -save-temps -o %t5 ; RUN: llvm-dis %t5.1.3.import.bc -o - | FileCheck %s --check-prefix=PRESERVED -; We currently don't support importing aliases -; IMPORT: @g.alias = external global i32 -; IMPORT-NEXT: @g = internal global i32 42, align 4 #0 +; IMPORT: @g = internal global i32 42, align 4 #0 ; IMPORT: attributes #0 = { "thinlto-internalize" } ; CODEGEN: define dso_local i32 @main ; CODEGEN-NEXT: ret i32 42 -; PRESERVED: @g.alias = external global i32 -; PRESERVED-NEXT: @g = available_externally global i32 42, align 4 +; PRESERVED: @g = available_externally global i32 42, align 4 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/ThinLTO/X86/index-const-prop-dead.ll b/llvm/test/ThinLTO/X86/index-const-prop-dead.ll --- a/llvm/test/ThinLTO/X86/index-const-prop-dead.ll +++ b/llvm/test/ThinLTO/X86/index-const-prop-dead.ll @@ -4,9 +4,9 @@ ; RUN: %t1.bc -r=%t1.bc,main,plx -r=%t1.bc,foo,pl -r=%t1.bc,g, -o %t3 ; RUN: llvm-dis %t3.2.3.import.bc -o - | FileCheck %s -; Dead globals are converted to declarations by ThinLTO in dropDeadSymbols -; If we try to internalize such we'll get a broken module. -; CHECK: @g = external global i32 +; Dead globals are converted to declarations by ThinLTO, and unused declarations +; are removed, in dropDeadSymbols, +; CHECK-NOT: @g target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu"