Index: lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- lib/Transforms/IPO/WholeProgramDevirt.cpp +++ lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -383,11 +383,6 @@ !SummaryTypeCheckedLoadUsers.empty(); } - void markSummaryHasTypeTestAssumeUsers() { - SummaryHasTypeTestAssumeUsers = true; - AllCallSitesDevirted = false; - } - void addSummaryTypeCheckedLoadUser(FunctionSummary *FS) { SummaryTypeCheckedLoadUsers.push_back(FS); AllCallSitesDevirted = false; @@ -395,7 +390,8 @@ void addSummaryTypeTestAssumeUser(FunctionSummary *FS) { SummaryTypeTestAssumeUsers.push_back(FS); - markSummaryHasTypeTestAssumeUsers(); + SummaryHasTypeTestAssumeUsers = true; + AllCallSitesDevirted = false; } void markDevirt() { @@ -504,7 +500,8 @@ void applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported); - bool trySingleImplDevirt(MutableArrayRef TargetsForSlot, + bool trySingleImplDevirt(ModuleSummaryIndex *ExportSummary, + MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res); @@ -923,9 +920,38 @@ Apply(P.second); } +static bool AddCalls(VTableSlotInfo &SlotInfo, const ValueInfo &Callee) { + // We can't add calls if we haven't seen a definition + if (Callee.getSummaryList().empty()) + return false; + + // Insert calls into the summary index so that the devirtualized targets + // are eligible for import. + // FIXME: Annotate type tests with hotness. For now, mark these as hot + // to better ensure we have the opportunity to inline them. + bool IsExported = false; + auto &S = Callee.getSummaryList()[0]; + CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0); + auto AddCalls = [&](CallSiteInfo &CSInfo) { + for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) { + FS->addCall({Callee, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) { + FS->addCall({Callee, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + }; + AddCalls(SlotInfo.CSInfo); + for (auto &P : SlotInfo.ConstCSInfo) + AddCalls(P.second); + return IsExported; +} + bool DevirtModule::trySingleImplDevirt( - MutableArrayRef TargetsForSlot, - VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) { + ModuleSummaryIndex *ExportSummary, + MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res) { // See if the program contains a single implementation of this virtual // function. Function *TheFn = TargetsForSlot[0].Fn; @@ -965,6 +991,10 @@ TheFn->setVisibility(GlobalValue::HiddenVisibility); TheFn->setName(NewName); } + if (ValueInfo TheFnVI = ExportSummary->getValueInfo(TheFn->getGUID())) + // Any needed promotion of 'TheFn' has already been done during + // LTO unit split, so we can ignore return value of AddCalls. + AddCalls(SlotInfo, TheFnVI); Res->TheKind = WholeProgramDevirtResolution::SingleImpl; Res->SingleImplName = TheFn->getName(); @@ -1000,27 +1030,7 @@ DevirtTargets.insert(TheFn); auto &S = TheFn.getSummaryList()[0]; - bool IsExported = false; - - // Insert calls into the summary index so that the devirtualized targets - // are eligible for import. - // FIXME: Annotate type tests with hotness. For now, mark these as hot - // to better ensure we have the opportunity to inline them. - CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0); - auto AddCalls = [&](CallSiteInfo &CSInfo) { - for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) { - FS->addCall({TheFn, CI}); - IsExported |= S->modulePath() != FS->modulePath(); - } - for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) { - FS->addCall({TheFn, CI}); - IsExported |= S->modulePath() != FS->modulePath(); - } - }; - AddCalls(SlotInfo.CSInfo); - for (auto &P : SlotInfo.ConstCSInfo) - AddCalls(P.second); - + bool IsExported = AddCalls(SlotInfo, TheFn); if (IsExported) ExportedGUIDs.insert(TheFn.getGUID()); @@ -1846,8 +1856,7 @@ // FIXME: Only add live functions. for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { for (Metadata *MD : MetadataByGUID[VF.GUID]) { - CallSlots[{MD, VF.Offset}] - .CSInfo.markSummaryHasTypeTestAssumeUsers(); + CallSlots[{MD, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); } } for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { @@ -1860,7 +1869,7 @@ for (Metadata *MD : MetadataByGUID[VC.VFunc.GUID]) { CallSlots[{MD, VC.VFunc.Offset}] .ConstCSInfo[VC.Args] - .markSummaryHasTypeTestAssumeUsers(); + .addSummaryTypeTestAssumeUser(FS); } } for (const FunctionSummary::ConstVCall &VC : @@ -1892,7 +1901,7 @@ cast(S.first.TypeID)->getString()) .WPDRes[S.first.ByteOffset]; - if (!trySingleImplDevirt(TargetsForSlot, S.second, Res)) { + if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) { DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second, Res, S.first); Index: test/ThinLTO/X86/Inputs/devirt_single_hybrid_bar.ll =================================================================== --- test/ThinLTO/X86/Inputs/devirt_single_hybrid_bar.ll +++ test/ThinLTO/X86/Inputs/devirt_single_hybrid_bar.ll @@ -0,0 +1,58 @@ +; ModuleID = 'bar.cpp' +source_filename = "bar.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.A = type { i32 (...)** } + +$_ZNK1A1fEv = comdat any + +$_ZTV1A = comdat any + +$_ZTS1A = comdat any + +$_ZTI1A = comdat any + +@_ZTV1A = linkonce_odr hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZNK1A1fEv to i8*)] }, comdat, align 8, !type !0, !type !1 +@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* +@_ZTS1A = linkonce_odr hidden constant [3 x i8] c"1A\00", comdat, align 1 +@_ZTI1A = linkonce_odr hidden constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) }, comdat, align 8 + +; Function Attrs: uwtable +define hidden i32 @_Z3barv() local_unnamed_addr #0 { +entry: + %b = alloca %struct.A, align 8 + %0 = bitcast %struct.A* %b to i8* + call void @llvm.lifetime.start.p0i8(i64 8, i8* nonnull %0) + %1 = getelementptr inbounds %struct.A, %struct.A* %b, i64 0, i32 0 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1A, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %1, align 8, !tbaa !4 + %call = call i32 @_Z3fooP1A(%struct.A* nonnull %b) + %add = add nsw i32 %call, 10 + call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) #4 + ret i32 %add +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) + +declare dso_local i32 @_Z3fooP1A(%struct.A*) local_unnamed_addr + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) + +; Function Attrs: nounwind uwtable +define linkonce_odr hidden i32 @_ZNK1A1fEv(%struct.A* %this) unnamed_addr comdat align 2 { +entry: + ret i32 3 +} + +!llvm.module.flags = !{!2} +!llvm.ident = !{!3} + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTSM1AKFivE.virtual"} +!2 = !{i32 1, !"wchar_size", i32 4} +!3 = !{!"clang version 10.0.0 (trunk 373596)"} +!4 = !{!5, !5, i64 0} +!5 = !{!"vtable pointer", !6, i64 0} +!6 = !{!"Simple C++ TBAA"} Index: test/ThinLTO/X86/Inputs/devirt_single_hybrid_foo.ll =================================================================== --- test/ThinLTO/X86/Inputs/devirt_single_hybrid_foo.ll +++ test/ThinLTO/X86/Inputs/devirt_single_hybrid_foo.ll @@ -0,0 +1,35 @@ +; ModuleID = 'foo.cpp' +source_filename = "foo.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.A = type { i32 (...)** } + +; Function Attrs: uwtable +define hidden i32 @_Z3fooP1A(%struct.A* %pA) local_unnamed_addr { +entry: + %0 = bitcast %struct.A* %pA to i32 (%struct.A*)*** + %vtable = load i32 (%struct.A*)**, i32 (%struct.A*)*** %0, align 8, !tbaa !2 + %1 = bitcast i32 (%struct.A*)** %vtable to i8* + %2 = tail call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + tail call void @llvm.assume(i1 %2) + %3 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable, align 8 + %call = tail call i32 %3(%struct.A* %pA) + %add = add nsw i32 %call, 10 + ret i32 %add +} + +; Function Attrs: nounwind readnone willreturn +declare i1 @llvm.type.test(i8*, metadata) + +; Function Attrs: nounwind willreturn +declare void @llvm.assume(i1) + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (trunk 373596)"} +!2 = !{!3, !3, i64 0} +!3 = !{!"vtable pointer", !4, i64 0} +!4 = !{!"Simple C++ TBAA"} Index: test/ThinLTO/X86/devirt_single_hybrid.ll =================================================================== --- test/ThinLTO/X86/devirt_single_hybrid.ll +++ test/ThinLTO/X86/devirt_single_hybrid.ll @@ -0,0 +1,55 @@ +; Check that we import and inline virtual method with single implementation +; when we're running hybrid LTO. +; +; RUN: opt -thinlto-bc -thinlto-split-lto-unit %s -o %t-main.bc +; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_foo.ll -o %t-foo.bc +; RUN: opt -thinlto-bc -thinlto-split-lto-unit %p/Inputs/devirt_single_hybrid_bar.ll -o %t-bar.bc +; RUN: llvm-lto2 run -save-temps %t-main.bc %t-foo.bc %t-bar.bc -pass-remarks=. -o %t \ +; RUN: -r=%t-foo.bc,_Z3fooP1A,pl \ +; RUN: -r=%t-main.bc,main,plx \ +; RUN: -r=%t-main.bc,_Z3barv,l \ +; RUN: -r=%t-bar.bc,_Z3barv,pl \ +; RUN: -r=%t-bar.bc,_Z3fooP1A, \ +; RUN: -r=%t-bar.bc,_ZNK1A1fEv,pl \ +; RUN: -r=%t-bar.bc,_ZTV1A,l \ +; RUN: -r=%t-bar.bc,_ZTVN10__cxxabiv117__class_type_infoE, \ +; RUN: -r=%t-bar.bc,_ZTS1A,pl \ +; RUN: -r=%t-bar.bc,_ZTI1A,pl \ +; RUN: -r=%t-bar.bc,_ZNK1A1fEv, \ +; RUN: -r=%t-bar.bc,_ZTV1A,pl \ +; RUN: -r=%t-bar.bc,_ZTI1A, 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t.1.3.import.bc -o - | FileCheck %s --check-prefix=IMPORT +; RUN: llvm-dis %t.1.5.precodegen.bc -o - | FileCheck %s --check-prefix=CODEGEN + +; REMARK-COUNT-3: single-impl: devirtualized a call to _ZNK1A1fEv + +; IMPORT: define available_externally hidden i32 @_ZNK1A1fEv(%struct.A* %this) +; IMPORT-NEXT: entry: +; IMPORT-NEXT: ret i32 3 + +; CODEGEN: define hidden i32 @main() +; CODEGEN-NEXT: entry: +; CODEGEN-NEXT: ret i32 23 + +; Virtual method should have been optimized out +; CODEGEN-NOT: _ZNK1A1fEv + +; ModuleID = 'main.cpp' +source_filename = "main.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse uwtable +define hidden i32 @main() local_unnamed_addr { +entry: + %call = tail call i32 @_Z3barv() + ret i32 %call +} + +declare dso_local i32 @_Z3barv() local_unnamed_addr + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (trunk 373596)"}