Index: llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h +++ llvm/trunk/include/llvm/IR/ModuleSummaryIndex.h @@ -632,6 +632,8 @@ /// Return the list of pairs. ArrayRef calls() const { return CallGraphEdgeList; } + void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); } + /// Returns the list of type identifiers used by this function in /// llvm.type.test intrinsics other than by an llvm.assume intrinsic, /// represented as GUIDs. @@ -1293,6 +1295,12 @@ return nullptr; } + TypeIdSummary *getTypeIdSummary(StringRef TypeId) { + return const_cast( + static_cast(this)->getTypeIdSummary( + TypeId)); + } + const std::map & typeIdCompatibleVtableMap() const { return TypeIdCompatibleVtableMap; Index: llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ llvm/trunk/include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -16,8 +16,10 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/IPO/FunctionImport.h" #include #include +#include #include #include @@ -28,6 +30,7 @@ class Function; class GlobalVariable; class ModuleSummaryIndex; +struct ValueInfo; namespace wholeprogramdevirt { @@ -228,6 +231,29 @@ PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; +struct VTableSlotSummary { + StringRef TypeID; + uint64_t ByteOffset; +}; + +/// Perform index-based whole program devirtualization on the \p Summary +/// index. Any devirtualized targets used by a type test in another module +/// are added to the \p ExportedGUIDs set. For any local devirtualized targets +/// only used within the defining module, the information necessary for +/// locating the corresponding WPD resolution is recorded for the ValueInfo +/// in case it is exported by cross module importing (in which case the +/// devirtualized target name will need adjustment). +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap); + +/// Call after cross-module importing to update the recorded single impl +/// devirt target names for any locals that were exported. +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + StringMap &ExportLists, + std::map> &LocalWPDTargetsMap); + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H Index: llvm/trunk/lib/LTO/LTO.cpp =================================================================== --- llvm/trunk/lib/LTO/LTO.cpp +++ llvm/trunk/lib/LTO/LTO.cpp @@ -44,6 +44,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" @@ -1274,15 +1275,28 @@ if (DumpThinCGSCCs) ThinLTO.CombinedIndex.dumpSCCs(outs()); + std::set ExportedGUIDs; + + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map> LocalWPDTargetsMap; + runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs, + LocalWPDTargetsMap); + if (Conf.OptLevel > 0) ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, ImportLists, ExportLists); + // Update local devirtualized targets that were exported by cross-module + // importing + updateIndexWPDForExports(ThinLTO.CombinedIndex, ExportLists, + LocalWPDTargetsMap); + // Figure out which symbols need to be internalized. This also needs to happen // at -O0 because summary-based DCE is implemented using internalization, and // we must apply DCE consistently with the full LTO module in order to avoid // undefined references during the final link. - std::set ExportedGUIDs; for (auto &Res : GlobalResolutions) { // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. Index: llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ llvm/trunk/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,12 +24,14 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines. +// This pass is intended to be used during the regular and thin LTO pipelines: +// // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are // eligible for virtual call optimization (i.e. calls that use either of the -// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During -// ThinLTO, the pass operates in two phases: +// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). +// +// During hybrid Regular/ThinLTO, the pass operates in two phases: // - Export phase: this is run during the thin link over a single merged module // that contains all vtables with !type metadata that participate in the link. // The pass computes a resolution for each virtual call and stores it in the @@ -38,6 +40,14 @@ // modules. The pass applies the resolutions previously computed during the // import phase to each eligible virtual call. // +// During ThinLTO, the pass operates in two phases: +// - Export phase: this is run during the thin link over the index which +// contains a summary of all vtables with !type metadata that participate in +// the link. It computes a resolution for each virtual call and stores it in +// the type identifier summary. Only single implementation devirtualization +// is supported. +// - Import phase: (same as with hybrid case above). +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -117,6 +127,11 @@ cl::desc("Maximum number of call targets per " "call site to enable branch funnels")); +static cl::opt + PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, + cl::init(false), cl::ZeroOrMore, + cl::desc("Print index-based devirtualization messages")); + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -265,6 +280,25 @@ } }; +template <> struct DenseMapInfo { + static VTableSlotSummary getEmptyKey() { + return {DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()}; + } + static VTableSlotSummary getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const VTableSlotSummary &I) { + return DenseMapInfo::getHashValue(I.TypeID) ^ + DenseMapInfo::getHashValue(I.ByteOffset); + } + static bool isEqual(const VTableSlotSummary &LHS, + const VTableSlotSummary &RHS) { + return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset; + } +}; + } // end namespace llvm namespace { @@ -342,6 +376,7 @@ /// pass the vector is non-empty, we will need to add a use of llvm.type.test /// to each of the function summaries in the vector. std::vector SummaryTypeCheckedLoadUsers; + std::vector SummaryTypeTestAssumeUsers; bool isExported() const { return SummaryHasTypeTestAssumeUsers || @@ -358,6 +393,11 @@ AllCallSitesDevirted = false; } + void addSummaryTypeTestAssumeUser(FunctionSummary *FS) { + SummaryTypeTestAssumeUsers.push_back(FS); + markSummaryHasTypeTestAssumeUsers(); + } + void markDevirt() { AllCallSitesDevirted = true; @@ -542,6 +582,38 @@ function_ref LookupDomTree); }; +struct DevirtIndex { + ModuleSummaryIndex &ExportSummary; + // The set in which to record GUIDs exported from their module by + // devirtualization, used by client to ensure they are not internalized. + std::set &ExportedGUIDs; + // A map in which to record the information necessary to locate the WPD + // resolution for local targets in case they are exported by cross module + // importing. + std::map> &LocalWPDTargetsMap; + + MapVector CallSlots; + + DevirtIndex( + ModuleSummaryIndex &ExportSummary, + std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap) + : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs), + LocalWPDTargetsMap(LocalWPDTargetsMap) {} + + bool tryFindVirtualCallTargets(std::vector &TargetsForSlot, + const TypeIdCompatibleVtableInfo TIdInfo, + uint64_t ByteOffset); + + bool trySingleImplDevirt(MutableArrayRef TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set &DevirtTargets); + + void run(); +}; + struct WholeProgramDevirt : public ModulePass { static char ID; @@ -632,6 +704,43 @@ return PreservedAnalyses::none(); } +namespace llvm { +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap) { + DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run(); +} + +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + StringMap &ExportLists, + std::map> &LocalWPDTargetsMap) { + for (auto &T : LocalWPDTargetsMap) { + auto &VI = T.first; + // This was enforced earlier during trySingleImplDevirt. + assert(VI.getSummaryList().size() == 1 && + "Devirt of local target has more than one copy"); + auto &S = VI.getSummaryList()[0]; + const auto &ExportList = ExportLists.find(S->modulePath()); + if (ExportList == ExportLists.end() || + !ExportList->second.count(VI.getGUID())) + continue; + + // It's been exported by a cross module import. + for (auto &SlotSummary : T.second) { + auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID); + assert(TIdSum); + auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset); + assert(WPDRes != TIdSum->WPDRes.end()); + WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + WPDRes->second.SingleImplName, + Summary.getModuleHash(S->modulePath())); + } + } +} + +} // end namespace llvm + bool DevirtModule::runForTesting( Module &M, function_ref AARGetter, function_ref OREGetter, @@ -766,6 +875,34 @@ return !TargetsForSlot.empty(); } +bool DevirtIndex::tryFindVirtualCallTargets( + std::vector &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo, + uint64_t ByteOffset) { + for (const TypeIdOffsetVtableInfo P : TIdInfo) { + // VTable initializer should have only one summary, or all copies must be + // linkonce/weak ODR. + assert(P.VTableVI.getSummaryList().size() == 1 || + llvm::all_of( + P.VTableVI.getSummaryList(), + [&](const std::unique_ptr &Summary) { + return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) || + GlobalValue::isWeakODRLinkage(Summary->linkage()); + })); + const auto *VS = cast(P.VTableVI.getSummaryList()[0].get()); + if (!P.VTableVI.getSummaryList()[0]->isLive()) + continue; + for (auto VTP : VS->vTableFuncs()) { + if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset) + continue; + + TargetsForSlot.push_back(VTP.FuncVI); + } + } + + // Give up if we couldn't find any targets. + return !TargetsForSlot.empty(); +} + void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported) { auto Apply = [&](CallSiteInfo &CSInfo) { @@ -837,6 +974,83 @@ return true; } +bool DevirtIndex::trySingleImplDevirt(MutableArrayRef TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set &DevirtTargets) { + // See if the program contains a single implementation of this virtual + // function. + auto TheFn = TargetsForSlot[0]; + for (auto &&Target : TargetsForSlot) + if (TheFn != Target) + return false; + + // Don't devirtualize if we don't have target definition. + auto Size = TheFn.getSummaryList().size(); + if (!Size) + return false; + + // If the summary list contains multiple summaries where at least one is + // a local, give up, as we won't know which (possibly promoted) name to use. + for (auto &S : TheFn.getSummaryList()) + if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1) + return false; + + // Collect functions devirtualized at least for one call site for stats. + if (PrintSummaryDevirt) + DevirtTargets.insert(TheFn); + + auto &S = TheFn.getSummaryList()[0]; + bool IsExported = false; + + // Insert calls into the summary index so that the devirtualized targets + // are eligible for import. + // FIXME: Annotate type tests with hotness. For now, mark these as hot + // to better ensure we have the opportunity to inline them. + CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0); + auto AddCalls = [&](CallSiteInfo &CSInfo) { + for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) { + FS->addCall({TheFn, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) { + FS->addCall({TheFn, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + }; + AddCalls(SlotInfo.CSInfo); + for (auto &P : SlotInfo.ConstCSInfo) + AddCalls(P.second); + + if (IsExported) + ExportedGUIDs.insert(TheFn.getGUID()); + + // Record in summary for use in devirtualization during the ThinLTO import + // step. + Res->TheKind = WholeProgramDevirtResolution::SingleImpl; + if (GlobalValue::isLocalLinkage(S->linkage())) { + if (IsExported) + // If target is a local function and we are exporting it by + // devirtualizing a call in another module, we need to record the + // promoted name. + Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + TheFn.name(), ExportSummary.getModuleHash(S->modulePath())); + else { + LocalWPDTargetsMap[TheFn].push_back(SlotSummary); + Res->SingleImplName = TheFn.name(); + } + } else + Res->SingleImplName = TheFn.name(); + + // Name will be empty if this thin link driven off of serialized combined + // index (e.g. llvm-lto). However, WPD is not supported/invoked for the + // legacy LTO API anyway. + assert(!Res->SingleImplName.empty()); + + return true; +} + void DevirtModule::tryICallBranchFunnel( MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res, VTableSlot Slot) { @@ -1486,8 +1700,11 @@ } void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { + auto *TypeId = dyn_cast(Slot.TypeID); + if (!TypeId) + return; const TypeIdSummary *TidSummary = - ImportSummary->getTypeIdSummary(cast(Slot.TypeID)->getString()); + ImportSummary->getTypeIdSummary(TypeId->getString()); if (!TidSummary) return; auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset); @@ -1496,6 +1713,7 @@ const WholeProgramDevirtResolution &Res = ResI->second; if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) { + assert(!Res.SingleImplName.empty()); // The type of the function in the declaration is irrelevant because every // call site will cast it to the correct type. Constant *SingleImpl = @@ -1713,7 +1931,7 @@ using namespace ore; OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) << "devirtualized " - << NV("FunctionName", F->getName())); + << NV("FunctionName", DT.first)); } } @@ -1727,3 +1945,78 @@ return true; } + +void DevirtIndex::run() { + if (ExportSummary.typeIdCompatibleVtableMap().empty()) + return; + + DenseMap> NameByGUID; + for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) { + NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); + } + + // Collect information from summary about which calls to try to devirtualize. + for (auto &P : ExportSummary) { + for (auto &S : P.second.SummaryList) { + auto *FS = dyn_cast(S.get()); + if (!FS) + continue; + // FIXME: Only add live functions. + for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); + } + } + for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_test_assume_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeTestAssumeUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_checked_load_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeCheckedLoadUser(FS); + } + } + } + } + + std::set DevirtTargets; + // For each (type, offset) pair: + for (auto &S : CallSlots) { + // Search each of the members of the type identifier for the virtual + // function implementation at offset S.first.ByteOffset, and add to + // TargetsForSlot. + std::vector TargetsForSlot; + auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID); + assert(TidSummary); + if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, + S.first.ByteOffset)) { + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; + + if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, + DevirtTargets)) + continue; + } + } + + // Optionally have the thin link print message for each devirtualized + // function. + if (PrintSummaryDevirt) + for (const auto &DT : DevirtTargets) + errs() << "Devirtualized call to " << DT << "\n"; + + return; +} Index: llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll =================================================================== --- llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll +++ llvm/trunk/test/ThinLTO/X86/Inputs/devirt2.ll @@ -0,0 +1,59 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } +%struct.E = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2 +@_ZTV1D = linkonce_odr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 +@_ZTV1E = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.E*, i32)* @_ZN1E1mEi to i8*)] }, !type !4 + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define internal i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define linkonce_odr i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +define internal i32 @_ZN1E1mEi(%struct.E* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @test2(%struct.E* %obj, i32 %a) { +entry: + %0 = bitcast %struct.E* %obj to i8*** + %vtable2 = load i8**, i8*** %0 + %1 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1E") + call void @llvm.assume(i1 %p2) + + %2 = bitcast i8** %vtable2 to i32 (%struct.E*, i32)** + %fptr33 = load i32 (%struct.E*, i32)*, i32 (%struct.E*, i32)** %2, align 8 + + %call4 = tail call i32 %fptr33(%struct.E* nonnull %obj, i32 %a) + ret i32 %call4 +} + +attributes #0 = { noinline optnone } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !"_ZTS1D"} +!4 = !{i64 16, !"_ZTS1E"} Index: llvm/trunk/test/ThinLTO/X86/devirt.ll =================================================================== --- llvm/trunk/test/ThinLTO/X86/devirt.ll +++ llvm/trunk/test/ThinLTO/X86/devirt.ll @@ -20,10 +20,10 @@ ; and that we generate summary information needed for index-based WPD. ; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefix=NOENABLESPLITFLAG ; NOENABLESPLITFLAG-DAG: !{i32 1, !"EnableSplitLTOUnit", i32 0} -; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi") -; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi") -; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi") -; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi") +; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi" +; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi" +; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi" +; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi" ; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]]) ; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]]) ; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]]) @@ -33,7 +33,31 @@ ; Type Id on _ZTV1D should have been promoted ; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "1${{.*}}", summary: ((offset: 16, [[D]]))) -; TODO: Test index-based WPD one %t2.o once implemented. +; Legacy PM, Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; New PM, Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR ; Legacy PM ; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436. @@ -138,10 +162,24 @@ declare i1 @llvm.type.test(i8*, metadata) declare void @llvm.assume(i1) -declare i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) -declare i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) -declare i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) -declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } !0 = !{i64 16, !"_ZTS1A"} !1 = !{i64 16, !"_ZTS1B"} Index: llvm/trunk/test/ThinLTO/X86/devirt2.ll =================================================================== --- llvm/trunk/test/ThinLTO/X86/devirt2.ll +++ llvm/trunk/test/ThinLTO/X86/devirt2.ll @@ -0,0 +1,278 @@ +; REQUIRES: x86-registered-target + +; Test devirtualization requiring promotion of local targets. + +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %s +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2.o %p/Inputs/devirt3.ll + +; Check that we have module flag showing splitting enabled, and that we don't +; generate summary information needed for index-based WPD. +; RUN: llvm-modextract -b -n=0 %t2.o -o %t2.o.0 +; RUN: llvm-dis -o - %t2.o.0 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidCompatibleVTable +; RUN: llvm-modextract -b -n=1 %t2.o -o %t2.o.1 +; RUN: llvm-dis -o - %t2.o.1 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidCompatibleVTable +; ENABLESPLITFLAG: !{i32 1, !"EnableSplitLTOUnit", i32 1} + +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t3.o %s +; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt3.ll + +; Check that we don't have module flag when splitting not enabled for ThinLTO, +; and that we generate summary information needed for index-based WPD. +; RUN: llvm-dis -o - %t4.o | FileCheck %s --check-prefix=NOENABLESPLITFLAG +; NOENABLESPLITFLAG-DAG: !{i32 1, !"EnableSplitLTOUnit", i32 0} +; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi" +; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi" +; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi" +; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi" +; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]]) +; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]]) +; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]]) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1A", summary: ((offset: 16, [[B]]), (offset: 16, [[C]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1B", summary: ((offset: 16, [[B]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1C", summary: ((offset: 16, [[C]]))) +; NOENABLESPLITFLAG-DAG: typeidCompatibleVTable: (name: "_ZTS1D", summary: ((offset: 16, [[D]]))) + +; Legacy PM, Index based WPD +; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -wholeprogramdevirt-print-index-based \ +; RUN: -o %t5 \ +; RUN: -r=%t3.o,test,px \ +; RUN: -r=%t3.o,_ZTV1B, \ +; RUN: -r=%t3.o,_ZTV1C, \ +; RUN: -r=%t3.o,_ZTV1D, \ +; RUN: -r=%t3.o,_ZN1D1mEi, \ +; RUN: -r=%t3.o,test2, \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,test2,px \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px \ +; RUN: -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 +; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1 +; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2 + +; New PM, Index based WPD +; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -wholeprogramdevirt-print-index-based \ +; RUN: -o %t5 \ +; RUN: -r=%t3.o,test,px \ +; RUN: -r=%t3.o,_ZTV1B, \ +; RUN: -r=%t3.o,_ZTV1C, \ +; RUN: -r=%t3.o,_ZTV1D, \ +; RUN: -r=%t3.o,_ZN1D1mEi, \ +; RUN: -r=%t3.o,test2, \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,test2,px \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px \ +; RUN: -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 +; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1 +; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2 + +; NM-INDEX1-DAG: U _ZN1A1nEi.llvm. +; NM-INDEX1-DAG: U _ZN1E1mEi.llvm. +; NM-INDEX1-DAG: U _ZN1D1mEi + +; NM-INDEX2-DAG: T _ZN1A1nEi.llvm. +; NM-INDEX2-DAG: T _ZN1E1mEi.llvm. +; NM-INDEX2-DAG: W _ZN1D1mEi +; NM-INDEX2-DAG: t _ZN1B1fEi +; NM-INDEX2-DAG: t _ZN1C1fEi + +; Index based WPD, distributed backends +; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm \ +; RUN: -thinlto-distributed-indexes -wholeprogramdevirt-print-index-based \ +; RUN: -o %t5 \ +; RUN: -r=%t3.o,test,px \ +; RUN: -r=%t3.o,_ZTV1B, \ +; RUN: -r=%t3.o,_ZTV1C, \ +; RUN: -r=%t3.o,_ZTV1D, \ +; RUN: -r=%t3.o,_ZN1D1mEi, \ +; RUN: -r=%t3.o,test2, \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,test2,px \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px \ +; RUN: -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=PRINT + +; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1A1nEi) +; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1E1mEi) +; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1D1mEi) + +; Legacy PM +; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -pass-remarks=. \ +; RUN: -o %t5 \ +; RUN: -r=%t1.o,test,px \ +; RUN: -r=%t1.o,_ZTV1B, \ +; RUN: -r=%t1.o,_ZTV1C, \ +; RUN: -r=%t1.o,_ZTV1D, \ +; RUN: -r=%t1.o,_ZTV1D, \ +; RUN: -r=%t1.o,_ZN1D1mEi, \ +; RUN: -r=%t1.o,_ZN1D1mEi, \ +; RUN: -r=%t1.o,test2, \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZN1E1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B, \ +; RUN: -r=%t2.o,_ZTV1C, \ +; RUN: -r=%t2.o,_ZTV1D, \ +; RUN: -r=%t2.o,_ZTV1E, \ +; RUN: -r=%t2.o,test2,px \ +; RUN: -r=%t2.o,_ZN1A1nEi, \ +; RUN: -r=%t2.o,_ZN1B1fEi, \ +; RUN: -r=%t2.o,_ZN1C1fEi, \ +; RUN: -r=%t2.o,_ZN1D1mEi, \ +; RUN: -r=%t2.o,_ZN1E1mEi, \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px \ +; RUN: -r=%t2.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 +; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-HYBRID1 +; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-HYBRID2 + +; New PM +; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t5 \ +; RUN: -r=%t1.o,test,px \ +; RUN: -r=%t1.o,_ZTV1B, \ +; RUN: -r=%t1.o,_ZTV1C, \ +; RUN: -r=%t1.o,_ZTV1D, \ +; RUN: -r=%t1.o,_ZTV1D, \ +; RUN: -r=%t1.o,_ZN1D1mEi, \ +; RUN: -r=%t1.o,_ZN1D1mEi, \ +; RUN: -r=%t1.o,test2, \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZN1E1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B, \ +; RUN: -r=%t2.o,_ZTV1C, \ +; RUN: -r=%t2.o,_ZTV1D, \ +; RUN: -r=%t2.o,_ZTV1E, \ +; RUN: -r=%t2.o,test2,px \ +; RUN: -r=%t2.o,_ZN1A1nEi, \ +; RUN: -r=%t2.o,_ZN1B1fEi, \ +; RUN: -r=%t2.o,_ZN1C1fEi, \ +; RUN: -r=%t2.o,_ZN1D1mEi, \ +; RUN: -r=%t2.o,_ZN1E1mEi, \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px \ +; RUN: -r=%t2.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 +; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-HYBRID1 +; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-HYBRID2 + +; NM-HYBRID1-DAG: U _ZN1A1nEi$ +; NM-HYBRID1-DAG: U _ZN1E1mEi$ +; NM-HYBRID1-DAG: U _ZN1D1mEi + +; NM-HYBRID2-DAG: T _ZN1A1nEi$ +; NM-HYBRID2-DAG: T _ZN1E1mEi$ +; NM-HYBRID2-DAG: W _ZN1D1mEi +; NM-HYBRID2-DAG: T _ZN1B1fEi +; NM-HYBRID2-DAG: T _ZN1C1fEi + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi +; We should devirt call to _ZN1E1mEi once in importing module and once +; in original (exporting) module. +; REMARK-DAG: single-impl: devirtualized a call to _ZN1E1mEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1E1mEi + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } +%struct.E = type { i32 (...)** } + +@_ZTV1B = external constant [4 x i8*] +@_ZTV1C = external constant [4 x i8*] +;@_ZTV1D = external constant [3 x i8*] +@_ZTV1D = linkonce_odr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 + +define linkonce_odr i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0 +} + +; CHECK-IR1-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, %struct.D* %obj2, %struct.E* %obj3, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. Ignore extra character before + ; symbol name which would happen if it was promoted during module + ; splitting for hybrid WPD. + ; CHECK-IR1: %call = tail call i32 bitcast (void ()* @{{.*}}_ZN1A1nEi + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR1: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !"_ZTS1D") + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR1: %call4 = tail call i32 @_ZN1D1mEi + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + + %call5 = tail call i32 @test2(%struct.E* nonnull %obj3, i32 %call4) + ret i32 %call5 +} +; CHECK-IR1-LABEL: ret i32 +; CHECK-IR1-LABEL: } + +; CHECK-IR2: define i32 @test2 +; CHECK-IR2-NEXT: entry: +; Check that the call was devirtualized. Ignore extra character before +; symbol name which would happen if it was promoted during module +; splitting for hybrid WPD. +; CHECK-IR2-NEXT: %call4 = tail call i32 @{{.*}}_ZN1E1mEi + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) +declare i32 @test2(%struct.E* %obj, i32 %a) + +attributes #0 = { noinline optnone } + +!3 = !{i64 16, !"_ZTS1D"} Index: llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll =================================================================== --- llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll +++ llvm/trunk/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll @@ -0,0 +1,66 @@ +; REQUIRES: x86-registered-target + +; Test that index-only devirtualization handles and ignores any +; type metadata that could not be summarized (because it was internal +; and could not be promoted due to the fact that the module has +; no external symbols and therefore could not be assigned a unique +; identifier). In this case we should simply not get the type +; metadata summary entries, and no promotion will occur. + +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t2.o %s + +; Check that we don't have module flag when splitting not enabled for ThinLTO, +; and that we generate summary information needed for index-based WPD. +; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefix=DIS +; DIS-NOT: typeIdInfo +; DIS-NOT: typeidMetadata + +; Legacy PM, Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,plx \ +; RUN: -r=%t2.o,_ZN1D1mEi, +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; New PM, Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,plx \ +; RUN: -r=%t2.o,_ZN1D1mEi, +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.D = type { i32 (...)** } + +@_ZTV1D = internal constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 + +; CHECK-IR-LABEL: define weak_odr dso_local i32 @test +define weak_odr i32 @test(%struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %0 + %1 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %1, metadata !4) + call void @llvm.assume(i1 %p2) + + %2 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %2, align 8 + + ; Check that the call was not devirtualized. + ; CHECK-IR: %call4 = tail call i32 %fptr33 + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 0) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) + +!3 = !{i64 16, !4} +!4 = distinct !{} Index: llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp +++ llvm/trunk/tools/llvm-lto2/llvm-lto2.cpp @@ -291,6 +291,14 @@ std::vector Res; for (const InputFile::Symbol &Sym : Input->symbols()) { auto I = CommandLineResolutions.find({F, Sym.getName()}); + // If it isn't found, look for "$", which would have been added + // (followed by a hash) when the symbol was promoted during module + // splitting if it was defined in one part and used in the other. + // Try looking up the symbol name before the "$". + if (I == CommandLineResolutions.end()) { + auto SplitName = Sym.getName().rsplit("$"); + I = CommandLineResolutions.find({F, SplitName.first}); + } if (I == CommandLineResolutions.end()) { llvm::errs() << argv[0] << ": missing symbol resolution for " << F << ',' << Sym.getName() << '\n';