Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -263,10 +263,25 @@ // Index-wide flags FS_FLAGS = 20, // Maps type identifier to summary information for that type identifier. + // Produced by the thin link (only lives in combined index). // TYPE_ID: [typeid, kind, bitwidth, align, size, bitmask, inlinebits, // n x (typeid, kind, name, numrba, // numrba x (numarg, numarg x arg, kind, info, byte, bit))] FS_TYPE_ID = 21, + // Maps type identifier to summary information for that type identifier + // computed from type metadata: the valueid of each vtable definition + // decorated with a type metadata for that identifier, and the offset from + // the corresponding type metadata. + // Exists in the per-module summary to provide information to thin link + // for index-based whole program devirtualization. + // TYPE_ID_METADATA: [typeid, n x (valueid, offset)] + FS_TYPE_ID_METADATA = 22, + // Summarizes vtable definition for use in index-based whole program + // devirtualization during the thin link. + // PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags, + // numrefs, numrefs x valueid, + // n x (valueid, offset)] + FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS = 23, }; enum MetadataCodes { Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -574,6 +574,8 @@ /// Return the list of pairs. ArrayRef calls() const { return CallGraphEdgeList; } + void addCall(EdgeTy E) { CallGraphEdgeList.push_back(E); } + /// Returns the list of type identifiers used by this function in /// llvm.type.test intrinsics other than by an llvm.assume intrinsic, /// represented as GUIDs. @@ -665,6 +667,12 @@ } }; +/// Pair of function ValueInfo and offset within a vtable definition +/// initializer array. +using VirtFuncOffsetPair = std::pair; +/// List of functions referenced by a particular vtable definition. +using VTableFuncList = std::vector; + /// Global variable summary information to aid decisions and /// implementation of importing. /// @@ -672,6 +680,11 @@ /// modified during the program run or not. This affects ThinLTO /// internalization class GlobalVarSummary : public GlobalValueSummary { +private: + /// For vtable definitions this holds the list of functions and + /// their corresponding offsets within the initializer array. + std::unique_ptr VTableFuncs; + public: struct GVarFlags { GVarFlags(bool ReadOnly = false) : ReadOnly(ReadOnly) {} @@ -692,6 +705,17 @@ GVarFlags varflags() const { return VarFlags; } void setReadOnly(bool RO) { VarFlags.ReadOnly = RO; } bool isReadOnly() const { return VarFlags.ReadOnly; } + + void setVTableFuncs(VTableFuncList Funcs) { + assert(!VTableFuncs); + VTableFuncs = llvm::make_unique(std::move(Funcs)); + } + + ArrayRef vTableFuncs() const { + if (VTableFuncs) + return *VTableFuncs; + return {}; + } }; struct TypeTestResolution { @@ -790,6 +814,14 @@ using TypeIdSummaryMapTy = std::multimap>; +/// Holds information about vtable definitions decorated with type metadata: +/// the vtable definition value and its offset in the corresponding type +/// metadata. +using TypeIdOffsetGVPair = std::pair; +/// List of vtable definitions decorated by the same type id metadata, +/// and their corresponding offsets in the type id metadata. +using TypeIdGVInfo = std::vector; + /// Class to hold module path string table and global value map, /// and encapsulate methods for operating on them. class ModuleSummaryIndex { @@ -802,9 +834,14 @@ ModulePathStringTableTy ModulePathStringTable; /// Mapping from type identifier GUIDs to type identifier and its summary - /// information. + /// information. Produced by thin link. TypeIdSummaryMapTy TypeIdMap; + /// Mapping from type identifier to information about vtables decorated + /// with that type identifier's metadata. Produced by per module summary + /// analysis and consumed by thin link. + std::map TypeIdMetadataMap; + /// Mapping from original ID to GUID. If original ID can map to multiple /// GUIDs, it will be mapped to 0. std::map OidGuidMap; @@ -1162,6 +1199,33 @@ return nullptr; } + TypeIdSummary *getTypeIdSummary(StringRef TypeId) { + return const_cast( + static_cast(this)->getTypeIdSummary( + TypeId)); + } + + const std::map &typeIdMetadataMap() const { + return TypeIdMetadataMap; + } + + /// Return an existing or new TypeIdMetadataMap entry for \p TypeId. + /// This accessor can mutate the map and therefore should not be used in + /// the ThinLTO backends. + TypeIdGVInfo &getOrInsertTypeIdMetadataSummary(StringRef TypeId) { + return TypeIdMetadataMap[TypeId]; + } + + /// For the given \p TypeId, this returns either a pointer to the + /// TypeIdMetadataMap entry (if present in the summary map) or null + /// (if not present). This may be used when importing. + const TypeIdGVInfo *getTypeIdMetadataSummary(StringRef TypeId) const { + auto I = TypeIdMetadataMap.find(TypeId); + if (I == TypeIdMetadataMap.end()) + return nullptr; + return &I->second; + } + /// Collect for the given module the list of functions it defines /// (GUID -> Summary). void collectDefinedFunctionsForModule(StringRef ModulePath, Index: include/llvm/Transforms/IPO/WholeProgramDevirt.h =================================================================== --- include/llvm/Transforms/IPO/WholeProgramDevirt.h +++ include/llvm/Transforms/IPO/WholeProgramDevirt.h @@ -16,8 +16,10 @@ #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/Transforms/IPO/FunctionImport.h" #include #include +#include #include #include @@ -28,6 +30,7 @@ class Function; class GlobalVariable; class ModuleSummaryIndex; +struct ValueInfo; namespace wholeprogramdevirt { @@ -228,6 +231,29 @@ PreservedAnalyses run(Module &M, ModuleAnalysisManager &); }; +struct VTableSlotSummary { + StringRef TypeID; + uint64_t ByteOffset; +}; + +/// Perform index-based whole program devirtualization on the \p Summary +/// index. Any devirtualized targets used by a type test in another module +/// are added to the \p ExportedGUIDs set. For any local devirtualized targets +/// only used within the defining module, the information necessary for +/// locating the corresponding WPD resolution is recorded for the ValueInfo +/// in case it is exported by cross module importing (in which case the +/// devirtualized target name will need adjustment). +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap); + +/// Call after cross-module importing to update the recorded single impl +/// devirt target names for any locals that were exported. +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + StringMap &ExportLists, + std::map> &LocalWPDTargetsMap); + } // end namespace llvm #endif // LLVM_TRANSFORMS_IPO_WHOLEPROGRAMDEVIRT_H Index: lib/Analysis/ModuleSummaryAnalysis.cpp =================================================================== --- lib/Analysis/ModuleSummaryAnalysis.cpp +++ lib/Analysis/ModuleSummaryAnalysis.cpp @@ -410,9 +410,98 @@ Index.addGlobalValueSummary(F, std::move(FuncSummary)); } -static void -computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V, - DenseSet &CantBePromoted) { +/// Find function pointers referenced within the given vtable initializer +/// (or subset of an initializer) \p I. The starting offset of \p I within +/// the vtable initializer is \p StartingOffset. Any discovered function +/// pointers are added to \p VTableFuncs along with their cumulative offset +/// within the initializer. +static void findFuncPointers(const Constant *I, uint64_t StartingOffset, + const Module &M, ModuleSummaryIndex &Index, + VTableFuncList &VTableFuncs) { + // First check if this is a function pointer. + if (I->getType()->isPointerTy()) { + auto Fn = dyn_cast(I->stripPointerCasts()); + // We can disregard __cxa_pure_virtual as a possible call target, as + // calls to pure virtuals are UB. + if (Fn && Fn->getName() != "__cxa_pure_virtual") + VTableFuncs.push_back( + std::make_pair(Index.getOrInsertValueInfo(Fn), StartingOffset)); + return; + } + + // Walk through the elements in the constant struct or array and recursively + // look for virtual function pointers. + const DataLayout &DL = M.getDataLayout(); + if (auto *C = dyn_cast(I)) { + StructType *STy = dyn_cast(C->getType()); + assert(STy); + const StructLayout *SL = DL.getStructLayout(C->getType()); + + for (StructType::element_iterator EB = STy->element_begin(), EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + auto Offset = SL->getElementOffset(EI - EB); + unsigned Op = SL->getElementContainingOffset(Offset); + findFuncPointers(cast(I->getOperand(Op)), + StartingOffset + Offset, M, Index, VTableFuncs); + } + } else if (auto *C = dyn_cast(I)) { + ArrayType *ATy = C->getType(); + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + findFuncPointers(cast(I->getOperand(i)), + StartingOffset + i * EltSize, M, Index, VTableFuncs); + } + } +} + +// Identify the function pointers referenced by vtable definition \p V. +static void computeVTableFuncs(ModuleSummaryIndex &Index, + const GlobalVariable &V, const Module &M, + VTableFuncList &VTableFuncs) { + if (!V.isConstant()) + return; + + findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index, + VTableFuncs); + +#ifndef NDEBUG + // Validate that the VTableFuncs list is ordered by offset. + uint64_t PrevOffset = 0; + for (auto &P : VTableFuncs) { + // The findVFuncPointers traversal should have encountered the + // functions in offset order. We need to use ">=" since PrevOffset + // starts at 0. + assert(P.second >= PrevOffset); + PrevOffset = P.second; + } +#endif +} + +/// Record vtable definition \p V for each type metadata it references. +static void recordTypeIdMetadataReferences(ModuleSummaryIndex &Index, + const GlobalVariable &V, + SmallVectorImpl &Types) { + for (MDNode *Type : Types) { + auto TypeID = Type->getOperand(1).get(); + + uint64_t Offset = + cast( + cast(Type->getOperand(0))->getValue()) + ->getZExtValue(); + + if (auto *TypeId = dyn_cast(TypeID)) + Index.getOrInsertTypeIdMetadataSummary(TypeId->getString()) + .push_back({Offset, Index.getOrInsertValueInfo(&V)}); + } +} + +static void computeVariableSummary(ModuleSummaryIndex &Index, + const GlobalVariable &V, + DenseSet &CantBePromoted, + const Module &M, + SmallVectorImpl &Types) { SetVector RefEdges; SmallPtrSet Visited; bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited); @@ -420,6 +509,21 @@ GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal, /* Live = */ false, V.isDSOLocal()); + VTableFuncList VTableFuncs; + // If splitting is not enabled, then we compute the summary information + // necessary for index-based whole program devirtualization. + if (!Index.enableSplitLTOUnit()) { + Types.clear(); + V.getMetadata(LLVMContext::MD_type, Types); + if (!Types.empty()) { + // Identify the function pointers referenced by this vtable definition. + computeVTableFuncs(Index, V, M, VTableFuncs); + + // Record this vtable definition for each type metadata it references. + recordTypeIdMetadataReferences(Index, V, Types); + } + } + // Don't mark variables we won't be able to internalize as read-only. GlobalVarSummary::GVarFlags VarFlags( !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() && @@ -430,6 +534,8 @@ CantBePromoted.insert(V.getGUID()); if (HasBlockAddress) GVarSummary->setNotEligibleToImport(); + if (!VTableFuncs.empty()) + GVarSummary->setVTableFuncs(VTableFuncs); Index.addGlobalValueSummary(V, std::move(GVarSummary)); } @@ -572,10 +678,11 @@ // Compute summaries for all variables defined in module, and save in the // index. + SmallVector Types; for (const GlobalVariable &G : M.globals()) { if (G.isDeclaration()) continue; - computeVariableSummary(Index, G, CantBePromoted); + computeVariableSummary(Index, G, CantBePromoted, M, Types); } // Compute summaries for all aliases defined in module, and save in the Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -748,6 +748,8 @@ KEYWORD(critical); KEYWORD(relbf); KEYWORD(variable); + KEYWORD(vTableFuncs); + KEYWORD(virtFunc); KEYWORD(aliasee); KEYWORD(refs); KEYWORD(typeIdInfo); @@ -760,6 +762,7 @@ KEYWORD(offset); KEYWORD(args); KEYWORD(typeid); + KEYWORD(typeidMetadata); KEYWORD(summary); KEYWORD(typeTestRes); KEYWORD(kind); Index: lib/AsmParser/LLParser.h =================================================================== --- lib/AsmParser/LLParser.h +++ lib/AsmParser/LLParser.h @@ -368,9 +368,11 @@ IdToIndexMapType &IdToIndexMap, unsigned Index); bool ParseVFuncId(FunctionSummary::VFuncId &VFuncId, IdToIndexMapType &IdToIndexMap, unsigned Index); + bool ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs); bool ParseOptionalRefs(std::vector &Refs); bool ParseTypeIdEntry(unsigned ID); bool ParseTypeIdSummary(TypeIdSummary &TIS); + bool ParseTypeIdMetadataEntry(unsigned ID); bool ParseTypeTestResolution(TypeTestResolution &TTRes); bool ParseOptionalWpdResolutions( std::map &WPDResMap); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -821,6 +821,9 @@ case lltok::kw_typeid: return ParseTypeIdEntry(SummaryID); break; + case lltok::kw_typeidMetadata: + return ParseTypeIdMetadataEntry(SummaryID); + break; default: return Error(Lex.getLoc(), "unexpected summary kind"); } @@ -7272,6 +7275,90 @@ return false; } +static ValueInfo EmptyVI = + ValueInfo(false, (GlobalValueSummaryMapTy::value_type *)-8); + +/// TypeIdMetadataEntry +/// ::= 'typeidMetadata' ':' '(' 'name' ':' STRINGCONSTANT ',' TypeIdGVInfo +/// ')' +bool LLParser::ParseTypeIdMetadataEntry(unsigned ID) { + assert(Lex.getKind() == lltok::kw_typeidMetadata); + Lex.Lex(); + + std::string Name; + if (ParseToken(lltok::colon, "expected ':' here") || + ParseToken(lltok::lparen, "expected '(' here") || + ParseToken(lltok::kw_name, "expected 'name' here") || + ParseToken(lltok::colon, "expected ':' here") || + ParseStringConstant(Name)) + return true; + + TypeIdGVInfo &TI = Index->getOrInsertTypeIdMetadataSummary(Name); + if (ParseToken(lltok::comma, "expected ',' here") || + ParseToken(lltok::kw_summary, "expected 'summary' here") || + ParseToken(lltok::colon, "expected ':' here") || + ParseToken(lltok::lparen, "expected '(' here")) + return true; + + IdToIndexMapType IdToIndexMap; + // Parse each call edge + do { + uint64_t Offset; + if (ParseToken(lltok::lparen, "expected '(' here") || + ParseToken(lltok::kw_offset, "expected 'offset' here") || + ParseToken(lltok::colon, "expected ':' here") || ParseUInt64(Offset) || + ParseToken(lltok::comma, "expected ',' here")) + return true; + + LocTy Loc = Lex.getLoc(); + unsigned GVId; + ValueInfo VI; + if (ParseGVReference(VI, GVId)) + return true; + + // Keep track of the TypeIdGVInfo array index needing a forward reference. + // We will save the location of the ValueInfo needing an update, but + // can only do so once the std::vector is finalized. + if (VI == EmptyVI) + IdToIndexMap[GVId].push_back(std::make_pair(TI.size(), Loc)); + TI.push_back({Offset, VI}); + + if (ParseToken(lltok::rparen, "expected ')' in call")) + return true; + } while (EatIfPresent(lltok::comma)); + + // Now that the TI vector is finalized, it is safe to save the locations + // of any forward GV references that need updating later. + for (auto I : IdToIndexMap) { + for (auto P : I.second) { + assert(TI[P.first].second == EmptyVI && + "Forward referenced ValueInfo expected to be empty"); + auto FwdRef = ForwardRefValueInfos.insert(std::make_pair( + I.first, std::vector>())); + FwdRef.first->second.push_back( + std::make_pair(&TI[P.first].second, P.second)); + } + } + + if (ParseToken(lltok::rparen, "expected ')' here") || + ParseToken(lltok::rparen, "expected ')' here")) + return true; + + // Check if this ID was forward referenced, and if so, update the + // corresponding GUIDs. + auto FwdRefTIDs = ForwardRefTypeIds.find(ID); + if (FwdRefTIDs != ForwardRefTypeIds.end()) { + for (auto TIDRef : FwdRefTIDs->second) { + assert(!*TIDRef.first && + "Forward referenced type id GUID expected to be 0"); + *TIDRef.first = GlobalValue::getGUID(Name); + } + ForwardRefTypeIds.erase(FwdRefTIDs); + } + + return false; +} + /// TypeTestResolution /// ::= 'typeTestRes' ':' '(' 'kind' ':' /// ( 'unsat' | 'byteArray' | 'inline' | 'single' | 'allOnes' ) ',' @@ -7779,6 +7866,7 @@ /*Live=*/false, /*IsLocal=*/false); GlobalVarSummary::GVarFlags GVarFlags(/*ReadOnly*/ false); std::vector Refs; + VTableFuncList VTableFuncs; if (ParseToken(lltok::colon, "expected ':' here") || ParseToken(lltok::lparen, "expected '(' here") || ParseModuleReference(ModulePath) || @@ -7787,10 +7875,20 @@ ParseGVarFlags(GVarFlags)) return true; - // Parse optional refs field - if (EatIfPresent(lltok::comma)) { - if (ParseOptionalRefs(Refs)) - return true; + // Parse optional fields + while (EatIfPresent(lltok::comma)) { + switch (Lex.getKind()) { + case lltok::kw_vTableFuncs: + if (ParseOptionalVTableFuncs(VTableFuncs)) + return true; + break; + case lltok::kw_refs: + if (ParseOptionalRefs(Refs)) + return true; + break; + default: + return Error(Lex.getLoc(), "expected optional variable summary field"); + } } if (ParseToken(lltok::rparen, "expected ')' here")) @@ -7800,6 +7898,7 @@ llvm::make_unique(GVFlags, GVarFlags, std::move(Refs)); GS->setModulePath(ModulePath); + GS->setVTableFuncs(std::move(VTableFuncs)); AddGlobalValueToIndex(Name, GUID, (GlobalValue::LinkageTypes)GVFlags.Linkage, ID, std::move(GS)); @@ -8017,6 +8116,67 @@ return false; } +/// OptionalVTableFuncs +/// := 'vTableFuncs' ':' '(' VTableFunc [',' VTableFunc]* ')' +/// VTableFunc ::= '(' 'virtFunc' ':' GVReference ',' 'offset' ':' UInt64 ')' +bool LLParser::ParseOptionalVTableFuncs(VTableFuncList &VTableFuncs) { + assert(Lex.getKind() == lltok::kw_vTableFuncs); + Lex.Lex(); + + if (ParseToken(lltok::colon, "expected ':' in vTableFuncs") | + ParseToken(lltok::lparen, "expected '(' in vTableFuncs")) + return true; + + IdToIndexMapType IdToIndexMap; + // Parse each virtual function pair + do { + ValueInfo VI; + if (ParseToken(lltok::lparen, "expected '(' in vTableFunc") || + ParseToken(lltok::kw_virtFunc, "expected 'callee' in vTableFunc") || + ParseToken(lltok::colon, "expected ':'")) + return true; + + LocTy Loc = Lex.getLoc(); + unsigned GVId; + if (ParseGVReference(VI, GVId)) + return true; + + uint64_t Offset; + if (ParseToken(lltok::comma, "expected comma") || + ParseToken(lltok::kw_offset, "expected offset") || + ParseToken(lltok::colon, "expected ':'") || ParseUInt64(Offset)) + return true; + + // Keep track of the VTableFuncs array index needing a forward reference. + // We will save the location of the ValueInfo needing an update, but + // can only do so once the std::vector is finalized. + if (VI == EmptyVI) + IdToIndexMap[GVId].push_back(std::make_pair(VTableFuncs.size(), Loc)); + VTableFuncs.push_back(std::make_pair(VI, Offset)); + + if (ParseToken(lltok::rparen, "expected ')' in vTableFunc")) + return true; + } while (EatIfPresent(lltok::comma)); + + // Now that the VTableFuncs vector is finalized, it is safe to save the + // locations of any forward GV references that need updating later. + for (auto I : IdToIndexMap) { + for (auto P : I.second) { + assert(VTableFuncs[P.first].first == EmptyVI && + "Forward referenced ValueInfo expected to be empty"); + auto FwdRef = ForwardRefValueInfos.insert(std::make_pair( + I.first, std::vector>())); + FwdRef.first->second.push_back( + std::make_pair(&VTableFuncs[P.first].first, P.second)); + } + } + + if (ParseToken(lltok::rparen, "expected ')' in vTableFuncs")) + return true; + + return false; +} + /// OptionalRefs /// := 'refs' ':' '(' GVReference [',' GVReference]* ')' bool LLParser::ParseOptionalRefs(std::vector &Refs) { Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -378,6 +378,8 @@ kw_critical, kw_relbf, kw_variable, + kw_vTableFuncs, + kw_virtFunc, kw_aliasee, kw_refs, kw_typeIdInfo, @@ -390,6 +392,7 @@ kw_offset, kw_args, kw_typeid, + kw_typeidMetadata, kw_summary, kw_typeTestRes, kw_kind, Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -747,6 +747,9 @@ bool HasRelBF); Error parseEntireSummary(unsigned ID); Error parseModuleStringTable(); + void parseTypeIdMetadataSummaryRecord(ArrayRef Record); + void parseTypeIdGVInfo(ArrayRef Record, size_t &Slot, + TypeIdGVInfo &TypeId); std::pair getValueInfoFromValueId(unsigned ValueId); @@ -5225,6 +5228,24 @@ parseWholeProgramDevirtResolution(Record, Strtab, Slot, TypeId); } +void ModuleSummaryIndexBitcodeReader::parseTypeIdGVInfo( + ArrayRef Record, size_t &Slot, TypeIdGVInfo &TypeId) { + uint64_t Offset = Record[Slot++]; + ValueInfo Callee = getValueInfoFromValueId(Record[Slot++]).first; + TypeId.push_back({Offset, Callee}); +} + +void ModuleSummaryIndexBitcodeReader::parseTypeIdMetadataSummaryRecord( + ArrayRef Record) { + size_t Slot = 0; + TypeIdGVInfo &TypeId = TheIndex.getOrInsertTypeIdMetadataSummary( + {Strtab.data() + Record[Slot], static_cast(Record[Slot + 1])}); + Slot += 2; + + while (Slot < Record.size()) + parseTypeIdGVInfo(Record, Slot, TypeId); +} + static void setImmutableRefs(std::vector &Refs, unsigned Count) { // Read-only refs are in the end of the refs list. for (unsigned RefNo = Refs.size() - Count; RefNo < Refs.size(); ++RefNo) @@ -5442,6 +5463,34 @@ TheIndex.addGlobalValueSummary(GUID.first, std::move(FS)); break; } + // FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: [valueid, flags, varflags, + // numrefs, numrefs x valueid, + // n x (valueid, offset)] + case bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS: { + unsigned ValueID = Record[0]; + uint64_t RawFlags = Record[1]; + GlobalVarSummary::GVarFlags GVF = getDecodedGVarFlags(Record[2]); + unsigned NumRefs = Record[3]; + unsigned RefListStartIndex = 4; + unsigned VTableListStartIndex = RefListStartIndex + NumRefs; + auto Flags = getDecodedGVSummaryFlags(RawFlags, Version); + std::vector Refs = makeRefList( + ArrayRef(Record).slice(RefListStartIndex, NumRefs)); + VTableFuncList VTableFuncs; + for (unsigned I = VTableListStartIndex, E = Record.size(); I != E; ++I) { + ValueInfo Callee = getValueInfoFromValueId(Record[I]).first; + uint64_t Offset = Record[++I]; + VTableFuncs.push_back({Callee, Offset}); + } + auto VS = + llvm::make_unique(Flags, GVF, std::move(Refs)); + VS->setModulePath(getThisModule()->first()); + VS->setVTableFuncs(VTableFuncs); + auto GUID = getValueInfoFromValueId(ValueID); + VS->setOriginalName(GUID.second); + TheIndex.addGlobalValueSummary(GUID.first, std::move(VS)); + break; + } // FS_COMBINED: [valueid, modid, flags, instcount, fflags, numrefs, // numrefs x valueid, n x (valueid)] // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, fflags, numrefs, @@ -5611,6 +5660,10 @@ case bitc::FS_TYPE_ID: parseTypeIdSummaryRecord(Record, Strtab, TheIndex); break; + + case bitc::FS_TYPE_ID_METADATA: + parseTypeIdMetadataSummaryRecord(Record); + break; } } llvm_unreachable("Exit infinite loop"); Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -214,7 +214,8 @@ const Function &F); void writeModuleLevelReferences(const GlobalVariable &V, SmallVector &NameVals, - unsigned FSModRefsAbbrev); + unsigned FSModRefsAbbrev, + unsigned FSModVTableRefsAbbrev); void assignValueId(GlobalValue::GUID ValGUID) { GUIDToValueIdMap[ValGUID] = ++GlobalValueId; @@ -3529,6 +3530,18 @@ W.second); } +static void writeTypeIdMetadataSummaryRecord( + SmallVector &NameVals, StringTableBuilder &StrtabBuilder, + const std::string &Id, const TypeIdGVInfo &Summary, ValueEnumerator &VE) { + NameVals.push_back(StrtabBuilder.add(Id)); + NameVals.push_back(Id.size()); + + for (auto &P : Summary) { + NameVals.push_back(P.first); + NameVals.push_back(VE.getValueID(P.second.getValue())); + } +} + // Helper to emit a single function summary record. void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, @@ -3573,7 +3586,7 @@ // and emit them in a summary record. void ModuleBitcodeWriterBase::writeModuleLevelReferences( const GlobalVariable &V, SmallVector &NameVals, - unsigned FSModRefsAbbrev) { + unsigned FSModRefsAbbrev, unsigned FSModVTableRefsAbbrev) { auto VI = Index->getValueInfo(V.getGUID()); if (!VI || VI.getSummaryList().empty()) { // Only declarations should not have a summary (a declaration might however @@ -3587,6 +3600,10 @@ NameVals.push_back(getEncodedGVSummaryFlags(VS->flags())); NameVals.push_back(getEncodedGVarFlags(VS->varflags())); + auto VTableFuncs = VS->vTableFuncs(); + if (!VTableFuncs.empty()) + NameVals.push_back(VS->refs().size()); + unsigned SizeBeforeRefs = NameVals.size(); for (auto &RI : VS->refs()) NameVals.push_back(VE.getValueID(RI.getValue())); @@ -3594,8 +3611,20 @@ // been initialized from a DenseSet. llvm::sort(NameVals.begin() + SizeBeforeRefs, NameVals.end()); - Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals, - FSModRefsAbbrev); + if (!VTableFuncs.empty()) { + // VTableFuncs pairs should already be sorted by offset. + for (auto &P : VTableFuncs) { + NameVals.push_back(VE.getValueID(P.first.getValue())); + NameVals.push_back(P.second); + } + } + + if (VTableFuncs.empty()) + Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals, + FSModRefsAbbrev); + else + Stream.EmitRecord(bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS, NameVals, + FSModVTableRefsAbbrev); NameVals.clear(); } @@ -3676,6 +3705,17 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSModRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + // Abbrev for FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS. + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_VTABLE_GLOBALVAR_INIT_REFS)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs + // numrefs x valueid, n x (valueid , offset) + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + unsigned FSModVTableRefsAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + // Abbrev for FS_ALIAS. Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_ALIAS)); @@ -3684,6 +3724,16 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + // Abbrev for FS_TYPE_ID_METADATA + Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::FS_TYPE_ID_METADATA)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // typeid strtab index + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // typeid length + // n x (valueid , offset) + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + unsigned TypeIdMetadataAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + SmallVector NameVals; // Iterate over the list of functions instead of the Index to // ensure the ordering is stable. @@ -3708,7 +3758,8 @@ // Capture references from GlobalVariable initializers, which are outside // of a function scope. for (const GlobalVariable &G : M.globals()) - writeModuleLevelReferences(G, NameVals, FSModRefsAbbrev); + writeModuleLevelReferences(G, NameVals, FSModRefsAbbrev, + FSModVTableRefsAbbrev); for (const GlobalAlias &A : M.aliases()) { auto *Aliasee = A.getBaseObject(); @@ -3726,6 +3777,16 @@ NameVals.clear(); } + if (!Index->typeIdMetadataMap().empty()) { + for (auto &S : Index->typeIdMetadataMap()) { + writeTypeIdMetadataSummaryRecord(NameVals, StrtabBuilder, S.first, + S.second, VE); + Stream.EmitRecord(bitc::FS_TYPE_ID_METADATA, NameVals, + TypeIdMetadataAbbrev); + NameVals.clear(); + } + } + Stream.ExitBlock(); } Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -1037,6 +1037,9 @@ TidIter != TheIndex->typeIds().end(); TidIter++) CreateTypeIdSlot(TidIter->second.first); + for (auto &TId : TheIndex->typeIdMetadataMap()) + CreateGUIDSlot(GlobalValue::getGUID(TId.first)); + ST_DEBUG("end processIndex!\n"); } @@ -2392,6 +2395,7 @@ void printGlobalVarSummary(const GlobalVarSummary *GS); void printFunctionSummary(const FunctionSummary *FS); void printTypeIdSummary(const TypeIdSummary &TIS); + void printTypeIdMetadataSummary(const TypeIdGVInfo &TI); void printTypeTestResolution(const TypeTestResolution &TTRes); void printArgs(const std::vector &Args); void printWPDRes(const WholeProgramDevirtResolution &WPDRes); @@ -2694,6 +2698,15 @@ printTypeIdSummary(TidIter->second.second); Out << ") ; guid = " << TidIter->first << "\n"; } + + // Print the TypeIdMetadataMap entries. + for (auto &TId : TheIndex->typeIdMetadataMap()) { + auto GUID = GlobalValue::getGUID(TId.first); + Out << "^" << Machine.getGUIDSlot(GUID) << " = typeidMetadata: (name: \"" + << TId.first << "\""; + printTypeIdMetadataSummary(TId.second); + Out << ") ; guid = " << GUID << "\n"; + } } static const char * @@ -2776,6 +2789,18 @@ Out << ")"; } +void AssemblyWriter::printTypeIdMetadataSummary(const TypeIdGVInfo &TI) { + Out << ", summary: ("; + FieldSeparator FS; + for (auto &P : TI) { + Out << FS; + Out << "(offset: " << P.first << ", "; + Out << "^" << Machine.getGUIDSlot(P.second.getGUID()); + Out << ")"; + } + Out << ")"; +} + void AssemblyWriter::printArgs(const std::vector &Args) { Out << "args: ("; FieldSeparator FS; @@ -2845,6 +2870,19 @@ void AssemblyWriter::printGlobalVarSummary(const GlobalVarSummary *GS) { Out << ", varFlags: (readonly: " << GS->VarFlags.ReadOnly << ")"; + + auto VTableFuncs = GS->vTableFuncs(); + if (!VTableFuncs.empty()) { + Out << ", vTableFuncs: ("; + FieldSeparator FS; + for (auto &P : VTableFuncs) { + Out << FS; + Out << "(virtFunc: ^" << Machine.getGUIDSlot(P.first.getGUID()) + << ", offset: " << P.second; + Out << ")"; + } + Out << ")"; + } } static std::string getLinkageName(GlobalValue::LinkageTypes LT) { Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -43,6 +43,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/SplitModule.h" #include @@ -1196,15 +1197,28 @@ if (DumpThinCGSCCs) ThinLTO.CombinedIndex.dumpSCCs(outs()); + std::set ExportedGUIDs; + + // Perform index-based WPD. This will return immediately if there are + // no index entries in the typeIdMetadata map (e.g. if we are instead + // performing IR-based WPD in hybrid regular/thin LTO mode). + std::map> LocalWPDTargetsMap; + runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs, + LocalWPDTargetsMap); + if (Conf.OptLevel > 0) ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, ImportLists, ExportLists); + // Update local devirtualized targets that were exported by cross-module + // importing + updateIndexWPDForExports(ThinLTO.CombinedIndex, ExportLists, + LocalWPDTargetsMap); + // Figure out which symbols need to be internalized. This also needs to happen // at -O0 because summary-based DCE is implemented using internalization, and // we must apply DCE consistently with the full LTO module in order to avoid // undefined references during the final link. - std::set ExportedGUIDs; for (auto &Res : GlobalResolutions) { // If the symbol does not have external references or it is not prevailing, // then not need to mark it as exported from a ThinLTO partition. Index: lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp =================================================================== --- lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -417,34 +417,55 @@ } } -// Returns whether this module needs to be split because splitting is -// enabled and it uses type metadata. -bool requiresSplit(Module &M) { - // First check if the LTO Unit splitting has been enabled. +// Check if the LTO Unit splitting has been enabled. +bool enableSplitLTOUnit(Module &M) { bool EnableSplitLTOUnit = false; if (auto *MD = mdconst::extract_or_null( M.getModuleFlag("EnableSplitLTOUnit"))) EnableSplitLTOUnit = MD->getZExtValue(); - if (!EnableSplitLTOUnit) - return false; + return EnableSplitLTOUnit; +} - // Module only needs to be split if it contains type metadata. +// Returns whether this module needs to be split because it uses type metadata. +bool hasTypeMetadata(Module &M) { for (auto &GO : M.global_objects()) { if (GO.hasMetadata(LLVMContext::MD_type)) return true; } - return false; } void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS, function_ref AARGetter, Module &M, const ModuleSummaryIndex *Index) { - // Split module if splitting is enabled and it contains any type metadata. - if (requiresSplit(M)) - return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M); + std::unique_ptr NewIndex = nullptr; + // See if this module has any type metadata. If so, we try to split it + // or at least promote type ids to enable WPD. + if (hasTypeMetadata(M)) { + if (enableSplitLTOUnit(M)) + return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M); + else { + // Promote type ids as needed for index-based WPD. + std::string ModuleId = getUniqueModuleId(&M); + if (!ModuleId.empty()) { + promoteTypeIds(M, ModuleId); + // Need to rebuild the index so that it contains type metadata + // for the newly promoted type ids. + // FIXME: Probably should not bother building the index at all + // in the caller of writeThinLTOBitcode (which does so via the + // ModuleSummaryIndexAnalysis pass), since we have to rebuild it + // anyway whenever there is type metadata (here or in + // splitAndWriteThinLTOBitcode). Just always build it once via the + // buildModuleSummaryIndex when Module(s) are ready. + ProfileSummaryInfo PSI(M); + NewIndex = llvm::make_unique( + buildModuleSummaryIndex(M, nullptr, &PSI)); + Index = NewIndex.get(); + } + } + } - // Otherwise we can just write it out as a regular module. + // Write it out as an unsplit ThinLTO module. // Save the module hash produced for the full bitcode, which will // be used in the backends, and use that in the minimized bitcode Index: lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- lib/Transforms/IPO/WholeProgramDevirt.cpp +++ lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -24,12 +24,14 @@ // returns 0, or a single vtable's function returns 1, replace each virtual // call with a comparison of the vptr against that vtable's address. // -// This pass is intended to be used during the regular and thin LTO pipelines. +// This pass is intended to be used during the regular and thin LTO pipelines: +// // During regular LTO, the pass determines the best optimization for each // virtual call and applies the resolutions directly to virtual calls that are // eligible for virtual call optimization (i.e. calls that use either of the -// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During -// ThinLTO, the pass operates in two phases: +// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). +// +// During hybrid Regular/ThinLTO, the pass operates in two phases: // - Export phase: this is run during the thin link over a single merged module // that contains all vtables with !type metadata that participate in the link. // The pass computes a resolution for each virtual call and stores it in the @@ -38,6 +40,14 @@ // modules. The pass applies the resolutions previously computed during the // import phase to each eligible virtual call. // +// During ThinLTO, the pass operates in two phases: +// - Export phase: this is run during the thin link over the index which +// contains a summary of all vtables with !type metadata that participate in +// the link. It computes a resolution for each virtual call and stores it in +// the type identifier summary. Only single implementation devirtualization +// is supported. +// - Import phase: (same as with hybrid case above). +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/WholeProgramDevirt.h" @@ -117,6 +127,11 @@ cl::desc("Maximum number of call targets per " "call site to enable branch funnels")); +static cl::opt + PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden, + cl::init(false), cl::ZeroOrMore, + cl::desc("Print index-based devirtualization messages")); + // Find the minimum offset that we may store a value of size Size bits at. If // IsAfter is set, look for an offset before the object, otherwise look for an // offset after the object. @@ -265,6 +280,25 @@ } }; +template <> struct DenseMapInfo { + static VTableSlotSummary getEmptyKey() { + return {DenseMapInfo::getEmptyKey(), + DenseMapInfo::getEmptyKey()}; + } + static VTableSlotSummary getTombstoneKey() { + return {DenseMapInfo::getTombstoneKey(), + DenseMapInfo::getTombstoneKey()}; + } + static unsigned getHashValue(const VTableSlotSummary &I) { + return DenseMapInfo::getHashValue(I.TypeID) ^ + DenseMapInfo::getHashValue(I.ByteOffset); + } + static bool isEqual(const VTableSlotSummary &LHS, + const VTableSlotSummary &RHS) { + return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset; + } +}; + } // end namespace llvm namespace { @@ -342,6 +376,7 @@ /// pass the vector is non-empty, we will need to add a use of llvm.type.test /// to each of the function summaries in the vector. std::vector SummaryTypeCheckedLoadUsers; + std::vector SummaryTypeTestAssumeUsers; bool isExported() const { return SummaryHasTypeTestAssumeUsers || @@ -358,6 +393,11 @@ AllCallSitesDevirted = false; } + void addSummaryTypeTestAssumeUser(FunctionSummary *FS) { + SummaryTypeTestAssumeUsers.push_back(FS); + markSummaryHasTypeTestAssumeUsers(); + } + void markDevirt() { AllCallSitesDevirted = true; @@ -542,6 +582,38 @@ function_ref LookupDomTree); }; +struct DevirtIndex { + ModuleSummaryIndex &ExportSummary; + // The set in which to record GUIDs exported from their module by + // devirtualization, used by client to ensure they are not internalized. + std::set &ExportedGUIDs; + // A map in which to record the information necessary to locate the WPD + // resolution for local targets in case they are exported by cross module + // importing. + std::map> &LocalWPDTargetsMap; + + MapVector CallSlots; + + DevirtIndex( + ModuleSummaryIndex &ExportSummary, + std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap) + : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs), + LocalWPDTargetsMap(LocalWPDTargetsMap) {} + + bool tryFindVirtualCallTargets(std::vector &TargetsForSlot, + const TypeIdGVInfo TypeIdGVInfo, + uint64_t ByteOffset); + + bool trySingleImplDevirt(MutableArrayRef TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set &DevirtTargets); + + void run(); +}; + struct WholeProgramDevirt : public ModulePass { static char ID; @@ -632,6 +704,43 @@ return PreservedAnalyses::none(); } +namespace llvm { +void runWholeProgramDevirtOnIndex( + ModuleSummaryIndex &Summary, std::set &ExportedGUIDs, + std::map> &LocalWPDTargetsMap) { + DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run(); +} + +void updateIndexWPDForExports( + ModuleSummaryIndex &Summary, + StringMap &ExportLists, + std::map> &LocalWPDTargetsMap) { + for (auto &T : LocalWPDTargetsMap) { + auto &VI = T.first; + // This was enforced earlier during trySingleImplDevirt. + assert(VI.getSummaryList().size() == 1 && + "Devirt of local target has more than one copy"); + auto &S = VI.getSummaryList()[0]; + const auto &ExportList = ExportLists.find(S->modulePath()); + if (ExportList == ExportLists.end() || + !ExportList->second.count(VI.getGUID())) + continue; + + // It's been exported by a cross module import. + for (auto &SlotSummary : T.second) { + auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID); + assert(TIdSum); + auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset); + assert(WPDRes != TIdSum->WPDRes.end()); + WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + WPDRes->second.SingleImplName, + Summary.getModuleHash(S->modulePath())); + } + } +} + +} // end namespace llvm + bool DevirtModule::runForTesting( Module &M, function_ref AARGetter, function_ref OREGetter, @@ -766,6 +875,28 @@ return !TargetsForSlot.empty(); } +bool DevirtIndex::tryFindVirtualCallTargets( + std::vector &TargetsForSlot, const TypeIdGVInfo TypeIdGVInfo, + uint64_t ByteOffset) { + for (const TypeIdOffsetGVPair P : TypeIdGVInfo) { + // VTable initializer should have only one summary (i.e. not be + // linkonce/weak). + assert(P.second.getSummaryList().size() == 1); + const auto *VS = cast(P.second.getSummaryList()[0].get()); + if (!P.second.getSummaryList()[0]->isLive()) + continue; + for (auto VTP : VS->vTableFuncs()) { + if (VTP.second != P.first + ByteOffset) + continue; + + TargetsForSlot.push_back(VTP.first); + } + } + + // Give up if we couldn't find any targets. + return !TargetsForSlot.empty(); +} + void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo, Constant *TheFn, bool &IsExported) { auto Apply = [&](CallSiteInfo &CSInfo) { @@ -837,6 +968,83 @@ return true; } +bool DevirtIndex::trySingleImplDevirt(MutableArrayRef TargetsForSlot, + VTableSlotSummary &SlotSummary, + VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res, + std::set &DevirtTargets) { + // See if the program contains a single implementation of this virtual + // function. + auto TheFn = TargetsForSlot[0]; + for (auto &&Target : TargetsForSlot) + if (TheFn != Target) + return false; + + // Don't devirtualize if we don't have target definition. + auto Size = TheFn.getSummaryList().size(); + if (!Size) + return false; + + // If the summary list contains multiple summaries where at least one is + // a local, give up, as we won't know which (possibly promoted) name to use. + for (auto &S : TheFn.getSummaryList()) + if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1) + return false; + + // Collect functions devirtualized at least for one call site for stats. + if (PrintSummaryDevirt) + DevirtTargets.insert(TheFn); + + auto &S = TheFn.getSummaryList()[0]; + bool IsExported = false; + + // Insert calls into the summary index so that the devirtualized targets + // are eligible for import. + // FIXME: Annotate type tests with hotness. For now, mark these as hot + // to better ensure we have the opportunity to inline them. + CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0); + auto AddCalls = [&](CallSiteInfo &CSInfo) { + for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) { + FS->addCall({TheFn, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) { + FS->addCall({TheFn, CI}); + IsExported |= S->modulePath() != FS->modulePath(); + } + }; + AddCalls(SlotInfo.CSInfo); + for (auto &P : SlotInfo.ConstCSInfo) + AddCalls(P.second); + + if (IsExported) + ExportedGUIDs.insert(TheFn.getGUID()); + + // Record in summary for use in devirtualization during the ThinLTO import + // step. + Res->TheKind = WholeProgramDevirtResolution::SingleImpl; + if (GlobalValue::isLocalLinkage(S->linkage())) { + if (IsExported) + // If target is a local function and we are exporting it by + // devirtualizing a call in another module, we need to record the + // promoted name. + Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal( + TheFn.name(), ExportSummary.getModuleHash(S->modulePath())); + else { + LocalWPDTargetsMap[TheFn].push_back(SlotSummary); + Res->SingleImplName = TheFn.name(); + } + } else + Res->SingleImplName = TheFn.name(); + + // Name will be empty if this thin link driven off of serialized combined + // index (e.g. llvm-lto). However, WPD is not supported/invoked for the + // legacy LTO API anyway. + assert(!Res->SingleImplName.empty()); + + return true; +} + void DevirtModule::tryICallBranchFunnel( MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res, VTableSlot Slot) { @@ -1482,8 +1690,11 @@ } void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) { + auto *TypeId = dyn_cast(Slot.TypeID); + if (!TypeId) + return; const TypeIdSummary *TidSummary = - ImportSummary->getTypeIdSummary(cast(Slot.TypeID)->getString()); + ImportSummary->getTypeIdSummary(TypeId->getString()); if (!TidSummary) return; auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset); @@ -1492,6 +1703,7 @@ const WholeProgramDevirtResolution &Res = ResI->second; if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) { + assert(!Res.SingleImplName.empty()); // The type of the function in the declaration is irrelevant because every // call site will cast it to the correct type. Constant *SingleImpl = @@ -1712,7 +1924,7 @@ using namespace ore; OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F) << "devirtualized " - << NV("FunctionName", F->getName())); + << NV("FunctionName", DT.first)); } } @@ -1726,3 +1938,78 @@ return true; } + +void DevirtIndex::run() { + if (ExportSummary.typeIdMetadataMap().empty()) + return; + + DenseMap> NameByGUID; + for (auto &P : ExportSummary.typeIdMetadataMap()) { + NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first); + } + + // Collect information from summary about which calls to try to devirtualize. + for (auto &P : ExportSummary) { + for (auto &S : P.second.SummaryList) { + auto *FS = dyn_cast(S.get()); + if (!FS) + continue; + // FIXME: Only add live functions. + for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS); + } + } + for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) { + for (StringRef Name : NameByGUID[VF.GUID]) { + CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_test_assume_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeTestAssumeUser(FS); + } + } + for (const FunctionSummary::ConstVCall &VC : + FS->type_checked_load_const_vcalls()) { + for (StringRef Name : NameByGUID[VC.VFunc.GUID]) { + CallSlots[{Name, VC.VFunc.Offset}] + .ConstCSInfo[VC.Args] + .addSummaryTypeCheckedLoadUser(FS); + } + } + } + } + + std::set DevirtTargets; + // For each (type, offset) pair: + for (auto &S : CallSlots) { + // Search each of the members of the type identifier for the virtual + // function implementation at offset S.first.ByteOffset, and add to + // TargetsForSlot. + std::vector TargetsForSlot; + auto *TidSummary = ExportSummary.getTypeIdMetadataSummary(S.first.TypeID); + assert(TidSummary); + if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary, + S.first.ByteOffset)) { + WholeProgramDevirtResolution *Res = + &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID) + .WPDRes[S.first.ByteOffset]; + + if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res, + DevirtTargets)) + continue; + } + } + + // Optionally have the thin link print message for each devirtualized + // function. + if (PrintSummaryDevirt) + for (const auto &DT : DevirtTargets) + errs() << "Devirtualized call to " << DT << "\n"; + + return; +} Index: test/Assembler/thinlto-vtable-summary.ll =================================================================== --- /dev/null +++ test/Assembler/thinlto-vtable-summary.ll @@ -0,0 +1,38 @@ +; Test summary parsing of index-based WPD related summary fields +; RUN: llvm-as %s -o - | llvm-dis -o %t.ll +; RUN: grep "^\^" %s >%t2 +; RUN: grep "^\^" %t.ll >%t3 +; Expect that the summary information is the same after round-trip through +; llvm-as and llvm-dis. +; RUN: diff %t2 %t3 + +source_filename = "thinlto-vtable-summary.ll" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2 + +declare i32 @_ZN1B1fEi(%struct.B*, i32) + +declare i32 @_ZN1A1nEi(%struct.A*, i32) + +declare i32 @_ZN1C1fEi(%struct.C*, i32) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} + +^0 = module: (path: "", hash: (0, 0, 0, 0, 0)) +^1 = gv: (name: "_ZN1A1nEi") ; guid = 1621563287929432257 +^2 = gv: (name: "_ZTV1B", summaries: (variable: (module: ^0, flags: (linkage: external, notEligibleToImport: 0, live: 0, dsoLocal: 0), varFlags: (readonly: 0), vTableFuncs: ((virtFunc: ^3, offset: 16), (virtFunc: ^1, offset: 24)), refs: (^3, ^1)))) ; guid = 5283576821522790367 +^3 = gv: (name: "_ZN1B1fEi") ; guid = 7162046368816414394 +^4 = gv: (name: "_ZTV1C", summaries: (variable: (module: ^0, flags: (linkage: external, notEligibleToImport: 0, live: 0, dsoLocal: 0), varFlags: (readonly: 0), vTableFuncs: ((virtFunc: ^5, offset: 16), (virtFunc: ^1, offset: 24)), refs: (^1, ^5)))) ; guid = 13624023785555846296 +^5 = gv: (name: "_ZN1C1fEi") ; guid = 14876272565662207556 +^6 = typeidMetadata: (name: "_ZTS1A", summary: ((offset: 16, ^2), (offset: 16, ^4))) ; guid = 7004155349499253778 +^7 = typeidMetadata: (name: "_ZTS1B", summary: ((offset: 16, ^2))) ; guid = 6203814149063363976 +^8 = typeidMetadata: (name: "_ZTS1C", summary: ((offset: 16, ^4))) ; guid = 1884921850105019584 Index: test/ThinLTO/X86/Inputs/devirt2.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/Inputs/devirt2.ll @@ -0,0 +1,59 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } +%struct.E = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 +@_ZTV1E = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.E*, i32)* @_ZN1E1mEi to i8*)] }, !type !4 + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define internal i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +define internal i32 @_ZN1E1mEi(%struct.E* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @test2(%struct.E* %obj, i32 %a) { +entry: + %0 = bitcast %struct.E* %obj to i8*** + %vtable2 = load i8**, i8*** %0 + %1 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1E") + call void @llvm.assume(i1 %p2) + + %2 = bitcast i8** %vtable2 to i32 (%struct.E*, i32)** + %fptr33 = load i32 (%struct.E*, i32)*, i32 (%struct.E*, i32)** %2, align 8 + + %call4 = tail call i32 %fptr33(%struct.E* nonnull %obj, i32 %a) + ret i32 %call4 +} + +attributes #0 = { noinline optnone } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !"_ZTS1D"} +!4 = !{i64 16, !"_ZTS1E"} Index: test/ThinLTO/X86/devirt.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/devirt.ll @@ -0,0 +1,184 @@ +; REQUIRES: x86-registered-target + +; Test devirtualization through the thin link and backend. + +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s + +; Check that we have module flag showing splitting enabled, and that we don't +; generate summary information needed for index-based WPD. +; RUN: llvm-modextract -b -n=0 %t.o -o %t.o.0 +; RUN: llvm-dis -o - %t.o.0 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidMetadata +; RUN: llvm-modextract -b -n=1 %t.o -o %t.o.1 +; RUN: llvm-dis -o - %t.o.1 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidMetadata +; ENABLESPLITFLAG: !{i32 1, !"EnableSplitLTOUnit", i32 1} + +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t2.o %s + +; Check that we don't have module flag when splitting not enabled for ThinLTO, +; and that we generate summary information needed for index-based WPD. +; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefix=NOENABLESPLITFLAG +; NOENABLESPLITFLAG-DAG: !{i32 1, !"EnableSplitLTOUnit", i32 0} +; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi" +; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi" +; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi" +; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi" +; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]]) +; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]]) +; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]]) +; NOENABLESPLITFLAG-DAG: typeidMetadata: (name: "_ZTS1A", summary: ((offset: 16, [[B]]), (offset: 16, [[C]]))) +; NOENABLESPLITFLAG-DAG: typeidMetadata: (name: "_ZTS1B", summary: ((offset: 16, [[B]]))) +; NOENABLESPLITFLAG-DAG: typeidMetadata: (name: "_ZTS1C", summary: ((offset: 16, [[C]]))) +; Type Id on _ZTV1D should have been promoted +; NOENABLESPLITFLAG-DAG: typeidMetadata: (name: "1${{.*}}", summary: ((offset: 16, [[D]]))) + +; Legacy PM, Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; New PM, Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,px \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; Legacy PM +; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZN1D1mEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZN1D1mEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px \ +; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; New PM +; RUN: llvm-lto2 run %t.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t.o,test,px \ +; RUN: -r=%t.o,_ZN1A1nEi,p \ +; RUN: -r=%t.o,_ZN1B1fEi,p \ +; RUN: -r=%t.o,_ZN1C1fEi,p \ +; RUN: -r=%t.o,_ZN1D1mEi,p \ +; RUN: -r=%t.o,_ZTV1B, \ +; RUN: -r=%t.o,_ZTV1C, \ +; RUN: -r=%t.o,_ZTV1D, \ +; RUN: -r=%t.o,_ZN1A1nEi, \ +; RUN: -r=%t.o,_ZN1B1fEi, \ +; RUN: -r=%t.o,_ZN1C1fEi, \ +; RUN: -r=%t.o,_ZN1D1mEi, \ +; RUN: -r=%t.o,_ZTV1B,px \ +; RUN: -r=%t.o,_ZTV1C,px \ +; RUN: -r=%t.o,_ZTV1D,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } + +@_ZTV1B = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !1 +@_ZTV1C = constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.C*, i32)* @_ZN1C1fEi to i8*), i8* bitcast (i32 (%struct.A*, i32)* @_ZN1A1nEi to i8*)] }, !type !0, !type !2 +@_ZTV1D = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 + + +; CHECK-IR-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, %struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call = tail call i32 @_ZN1A1nEi + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !4) + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR: %call4 = tail call i32 @_ZN1D1mEi + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { + ret i32 0; +} + +define i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { + ret i32 0; +} + +; Make sure we don't inline or otherwise optimize out the direct calls. +attributes #0 = { noinline optnone } + +!0 = !{i64 16, !"_ZTS1A"} +!1 = !{i64 16, !"_ZTS1B"} +!2 = !{i64 16, !"_ZTS1C"} +!3 = !{i64 16, !4} +!4 = distinct !{} Index: test/ThinLTO/X86/devirt2.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/devirt2.ll @@ -0,0 +1,259 @@ +; REQUIRES: x86-registered-target + +; Test devirtualization requiring promotion of local targets. + +; Generate split module with summary for hybrid Thin/Regular LTO WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %s +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t2.o %p/Inputs/devirt2.ll + +; Check that we have module flag showing splitting enabled, and that we don't +; generate summary information needed for index-based WPD. +; RUN: llvm-modextract -b -n=0 %t2.o -o %t2.o.0 +; RUN: llvm-dis -o - %t2.o.0 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidMetadata +; RUN: llvm-modextract -b -n=1 %t2.o -o %t2.o.1 +; RUN: llvm-dis -o - %t2.o.1 | FileCheck %s --check-prefix=ENABLESPLITFLAG --implicit-check-not=vTableFuncs --implicit-check-not=typeidMetadata +; ENABLESPLITFLAG: !{i32 1, !"EnableSplitLTOUnit", i32 1} + +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -o %t3.o %s +; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt2.ll + +; Check that we don't have module flag when splitting not enabled for ThinLTO, +; and that we generate summary information needed for index-based WPD. +; RUN: llvm-dis -o - %t4.o | FileCheck %s --check-prefix=NOENABLESPLITFLAG +; NOENABLESPLITFLAG-DAG: !{i32 1, !"EnableSplitLTOUnit", i32 0} +; NOENABLESPLITFLAG-DAG: [[An:\^[0-9]+]] = gv: (name: "_ZN1A1nEi" +; NOENABLESPLITFLAG-DAG: [[Bf:\^[0-9]+]] = gv: (name: "_ZN1B1fEi" +; NOENABLESPLITFLAG-DAG: [[Cf:\^[0-9]+]] = gv: (name: "_ZN1C1fEi" +; NOENABLESPLITFLAG-DAG: [[Dm:\^[0-9]+]] = gv: (name: "_ZN1D1mEi" +; NOENABLESPLITFLAG-DAG: [[B:\^[0-9]+]] = gv: (name: "_ZTV1B", {{.*}} vTableFuncs: ((virtFunc: [[Bf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[Bf]], [[An]]) +; NOENABLESPLITFLAG-DAG: [[C:\^[0-9]+]] = gv: (name: "_ZTV1C", {{.*}} vTableFuncs: ((virtFunc: [[Cf]], offset: 16), (virtFunc: [[An]], offset: 24)), refs: ([[An]], [[Cf]]) +; NOENABLESPLITFLAG-DAG: [[D:\^[0-9]+]] = gv: (name: "_ZTV1D", {{.*}} vTableFuncs: ((virtFunc: [[Dm]], offset: 16)), refs: ([[Dm]]) +; NOENABLESPLITFLAG-DAG: typeidMetadata: (name: "_ZTS1A", summary: ((offset: 16, [[B]]), (offset: 16, [[C]]))) +; NOENABLESPLITFLAG-DAG: typeidMetadata: (name: "_ZTS1B", summary: ((offset: 16, [[B]]))) +; NOENABLESPLITFLAG-DAG: typeidMetadata: (name: "_ZTS1C", summary: ((offset: 16, [[C]]))) +; NOENABLESPLITFLAG-DAG: typeidMetadata: (name: "_ZTS1D", summary: ((offset: 16, [[D]]))) + +; Legacy PM, Index based WPD +; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -pass-remarks=. \ +; RUN: -wholeprogramdevirt-print-index-based \ +; RUN: -o %t5 \ +; RUN: -r=%t3.o,test,px \ +; RUN: -r=%t3.o,_ZTV1B, \ +; RUN: -r=%t3.o,_ZTV1C, \ +; RUN: -r=%t3.o,_ZTV1D, \ +; RUN: -r=%t3.o,test2, \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,test2,px \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px \ +; RUN: -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 +; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1 +; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2 + +; New PM, Index based WPD +; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -wholeprogramdevirt-print-index-based \ +; RUN: -o %t5 \ +; RUN: -r=%t3.o,test,px \ +; RUN: -r=%t3.o,_ZTV1B, \ +; RUN: -r=%t3.o,_ZTV1C, \ +; RUN: -r=%t3.o,_ZTV1D, \ +; RUN: -r=%t3.o,test2, \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,test2,px \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px \ +; RUN: -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK --check-prefix=PRINT +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 +; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-INDEX1 +; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-INDEX2 + +; NM-INDEX1-DAG: U _ZN1A1nEi.llvm. +; NM-INDEX1-DAG: U _ZN1E1mEi.llvm. +; NM-INDEX1-DAG: U _ZN1D1mEi + +; NM-INDEX2-DAG: T _ZN1A1nEi.llvm. +; NM-INDEX2-DAG: T _ZN1E1mEi.llvm. +; NM-INDEX2-DAG: T _ZN1D1mEi +; NM-INDEX2-DAG: t _ZN1B1fEi +; NM-INDEX2-DAG: t _ZN1C1fEi + +; Index based WPD, distributed backends +; RUN: llvm-lto2 run %t3.o %t4.o -save-temps -use-new-pm \ +; RUN: -thinlto-distributed-indexes -wholeprogramdevirt-print-index-based \ +; RUN: -o %t5 \ +; RUN: -r=%t3.o,test,px \ +; RUN: -r=%t3.o,_ZTV1B, \ +; RUN: -r=%t3.o,_ZTV1C, \ +; RUN: -r=%t3.o,_ZTV1D, \ +; RUN: -r=%t3.o,test2, \ +; RUN: -r=%t4.o,_ZN1B1fEi,p \ +; RUN: -r=%t4.o,_ZN1C1fEi,p \ +; RUN: -r=%t4.o,_ZN1D1mEi,p \ +; RUN: -r=%t4.o,test2,px \ +; RUN: -r=%t4.o,_ZTV1B,px \ +; RUN: -r=%t4.o,_ZTV1C,px \ +; RUN: -r=%t4.o,_ZTV1D,px \ +; RUN: -r=%t4.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=PRINT + +; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1A1nEi) +; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1E1mEi) +; PRINT-DAG: Devirtualized call to {{.*}} (_ZN1D1mEi) + +; Legacy PM +; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -pass-remarks=. \ +; RUN: -o %t5 \ +; RUN: -r=%t1.o,test,px \ +; RUN: -r=%t1.o,_ZTV1B, \ +; RUN: -r=%t1.o,_ZTV1C, \ +; RUN: -r=%t1.o,_ZTV1D, \ +; RUN: -r=%t1.o,test2, \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZN1E1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B, \ +; RUN: -r=%t2.o,_ZTV1C, \ +; RUN: -r=%t2.o,_ZTV1D, \ +; RUN: -r=%t2.o,_ZTV1E, \ +; RUN: -r=%t2.o,test2,px \ +; RUN: -r=%t2.o,_ZN1A1nEi, \ +; RUN: -r=%t2.o,_ZN1B1fEi, \ +; RUN: -r=%t2.o,_ZN1C1fEi, \ +; RUN: -r=%t2.o,_ZN1D1mEi, \ +; RUN: -r=%t2.o,_ZN1E1mEi, \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px \ +; RUN: -r=%t2.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 +; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-HYBRID1 +; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-HYBRID2 + +; New PM +; RUN: llvm-lto2 run %t1.o %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t5 \ +; RUN: -r=%t1.o,test,px \ +; RUN: -r=%t1.o,_ZTV1B, \ +; RUN: -r=%t1.o,_ZTV1C, \ +; RUN: -r=%t1.o,_ZTV1D, \ +; RUN: -r=%t1.o,test2, \ +; RUN: -r=%t2.o,_ZN1A1nEi,p \ +; RUN: -r=%t2.o,_ZN1B1fEi,p \ +; RUN: -r=%t2.o,_ZN1C1fEi,p \ +; RUN: -r=%t2.o,_ZN1D1mEi,p \ +; RUN: -r=%t2.o,_ZN1E1mEi,p \ +; RUN: -r=%t2.o,_ZTV1B, \ +; RUN: -r=%t2.o,_ZTV1C, \ +; RUN: -r=%t2.o,_ZTV1D, \ +; RUN: -r=%t2.o,_ZTV1E, \ +; RUN: -r=%t2.o,test2,px \ +; RUN: -r=%t2.o,_ZN1A1nEi, \ +; RUN: -r=%t2.o,_ZN1B1fEi, \ +; RUN: -r=%t2.o,_ZN1C1fEi, \ +; RUN: -r=%t2.o,_ZN1D1mEi, \ +; RUN: -r=%t2.o,_ZN1E1mEi, \ +; RUN: -r=%t2.o,_ZTV1B,px \ +; RUN: -r=%t2.o,_ZTV1C,px \ +; RUN: -r=%t2.o,_ZTV1D,px \ +; RUN: -r=%t2.o,_ZTV1E,px 2>&1 | FileCheck %s --check-prefix=REMARK +; RUN: llvm-dis %t5.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1 +; RUN: llvm-dis %t5.2.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR2 +; RUN: nm %t5.1 | FileCheck %s --check-prefix=NM-HYBRID1 +; RUN: nm %t5.2 | FileCheck %s --check-prefix=NM-HYBRID2 + +; NM-HYBRID1-DAG: U _ZN1A1nEi$ +; NM-HYBRID1-DAG: U _ZN1E1mEi$ +; NM-HYBRID1-DAG: U _ZN1D1mEi + +; NM-HYBRID2-DAG: T _ZN1A1nEi$ +; NM-HYBRID2-DAG: T _ZN1E1mEi$ +; NM-HYBRID2-DAG: T _ZN1D1mEi +; NM-HYBRID2-DAG: T _ZN1B1fEi +; NM-HYBRID2-DAG: T _ZN1C1fEi + +; REMARK-DAG: single-impl: devirtualized a call to _ZN1A1nEi +; REMARK-DAG: single-impl: devirtualized a call to _ZN1D1mEi +; We should devirt call to _ZN1E1mEi once in importing module and once +; in original (exporting) module. +; REMARK-DAG-COUNT-2: single-impl: devirtualized a call to _ZN1E1mEi + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.A = type { i32 (...)** } +%struct.B = type { %struct.A } +%struct.C = type { %struct.A } +%struct.D = type { i32 (...)** } +%struct.E = type { i32 (...)** } + +@_ZTV1B = external constant [4 x i8*] +@_ZTV1C = external constant [4 x i8*] +@_ZTV1D = external constant [3 x i8*] + +; CHECK-IR1-LABEL: define i32 @test +define i32 @test(%struct.A* %obj, %struct.D* %obj2, %struct.E* %obj3, i32 %a) { +entry: + %0 = bitcast %struct.A* %obj to i8*** + %vtable = load i8**, i8*** %0 + %1 = bitcast i8** %vtable to i8* + %p = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1A") + call void @llvm.assume(i1 %p) + %fptrptr = getelementptr i8*, i8** %vtable, i32 1 + %2 = bitcast i8** %fptrptr to i32 (%struct.A*, i32)** + %fptr1 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %2, align 8 + + ; Check that the call was devirtualized. Ignore extra character before + ; symbol name which would happen if it was promoted during module + ; splitting for hybrid WPD. + ; CHECK-IR1: %call = tail call i32 bitcast (void ()* @{{.*}}_ZN1A1nEi + %call = tail call i32 %fptr1(%struct.A* nonnull %obj, i32 %a) + + %3 = bitcast i8** %vtable to i32 (%struct.A*, i32)** + %fptr22 = load i32 (%struct.A*, i32)*, i32 (%struct.A*, i32)** %3, align 8 + + ; We still have to call it as virtual. + ; CHECK-IR1: %call3 = tail call i32 %fptr22 + %call3 = tail call i32 %fptr22(%struct.A* nonnull %obj, i32 %call) + + %4 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %4 + %5 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %5, metadata !"_ZTS1D") + call void @llvm.assume(i1 %p2) + + %6 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %6, align 8 + + ; Check that the call was devirtualized. + ; CHECK-IR1: %call4 = tail call i32 bitcast (void ()* @_ZN1D1mEi + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 %call3) + + %call5 = tail call i32 @test2(%struct.E* nonnull %obj3, i32 %call4) + ret i32 %call5 +} +; CHECK-IR1-LABEL: ret i32 +; CHECK-IR1-LABEL: } + +; CHECK-IR2: define i32 @test2 +; CHECK-IR2-NEXT: entry: +; Check that the call was devirtualized. Ignore extra character before +; symbol name which would happen if it was promoted during module +; splitting for hybrid WPD. +; CHECK-IR2-NEXT: %call4 = tail call i32 @{{.*}}_ZN1E1mEi + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) +declare i32 @test2(%struct.E* %obj, i32 %a) Index: test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll =================================================================== --- /dev/null +++ test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll @@ -0,0 +1,66 @@ +; REQUIRES: x86-registered-target + +; Test that index-only devirtualization handles and ignores any +; type metadata that could not be summarized (because it was internal +; and could not be promoted due to the fact that the module has +; no external symbols and therefore could not be assigned a unique +; identifier). In this case we should simply not get the type +; metadata summary entries, and no promotion will occur. + +; Generate unsplit module with summary for ThinLTO index-based WPD. +; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t2.o %s + +; Check that we don't have module flag when splitting not enabled for ThinLTO, +; and that we generate summary information needed for index-based WPD. +; RUN: llvm-dis -o - %t2.o | FileCheck %s --check-prefix=DIS +; DIS-NOT: typeIdInfo +; DIS-NOT: typeidMetadata + +; Legacy PM, Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,plx \ +; RUN: -r=%t2.o,_ZN1D1mEi, +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +; New PM, Index based WPD +; RUN: llvm-lto2 run %t2.o -save-temps -use-new-pm -pass-remarks=. \ +; RUN: -o %t3 \ +; RUN: -r=%t2.o,test,plx \ +; RUN: -r=%t2.o,_ZN1D1mEi, +; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-grtev4-linux-gnu" + +%struct.D = type { i32 (...)** } + +@_ZTV1D = internal constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3 + +; CHECK-IR-LABEL: define weak_odr dso_local i32 @test +define weak_odr i32 @test(%struct.D* %obj2, i32 %a) { +entry: + %0 = bitcast %struct.D* %obj2 to i8*** + %vtable2 = load i8**, i8*** %0 + %1 = bitcast i8** %vtable2 to i8* + %p2 = call i1 @llvm.type.test(i8* %1, metadata !4) + call void @llvm.assume(i1 %p2) + + %2 = bitcast i8** %vtable2 to i32 (%struct.D*, i32)** + %fptr33 = load i32 (%struct.D*, i32)*, i32 (%struct.D*, i32)** %2, align 8 + + ; Check that the call was not devirtualized. + ; CHECK-IR: %call4 = tail call i32 %fptr33 + %call4 = tail call i32 %fptr33(%struct.D* nonnull %obj2, i32 0) + ret i32 %call4 +} +; CHECK-IR-LABEL: ret i32 +; CHECK-IR-LABEL: } + +declare i1 @llvm.type.test(i8*, metadata) +declare void @llvm.assume(i1) + +declare i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) + +!3 = !{i64 16, !4} +!4 = distinct !{} Index: tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp =================================================================== --- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -316,6 +316,7 @@ STRINGIFY_CODE(FS, PERMODULE_PROFILE) STRINGIFY_CODE(FS, PERMODULE_RELBF) STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS) + STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS) STRINGIFY_CODE(FS, COMBINED) STRINGIFY_CODE(FS, COMBINED_PROFILE) STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS) @@ -333,6 +334,7 @@ STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS) STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS) STRINGIFY_CODE(FS, TYPE_ID) + STRINGIFY_CODE(FS, TYPE_ID_METADATA) } case bitc::METADATA_ATTACHMENT_ID: switch(CodeID) { Index: tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- tools/llvm-lto2/llvm-lto2.cpp +++ tools/llvm-lto2/llvm-lto2.cpp @@ -266,6 +266,14 @@ std::vector Res; for (const InputFile::Symbol &Sym : Input->symbols()) { auto I = CommandLineResolutions.find({F, Sym.getName()}); + // If it isn't found, look for "$", which would have been added + // (followed by a hash) when the symbol was promoted during module + // splitting if it was defined in one part and used in the other. + // Try looking up the symbol name before the "$". + if (I == CommandLineResolutions.end()) { + auto SplitName = Sym.getName().rsplit("$"); + I = CommandLineResolutions.find({F, SplitName.first}); + } if (I == CommandLineResolutions.end()) { llvm::errs() << argv[0] << ": missing symbol resolution for " << F << ',' << Sym.getName() << '\n';