Index: llvm/lib/Bitcode/Reader/MetadataLoader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -14,10 +14,12 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" @@ -86,12 +88,23 @@ using namespace llvm; +#define DEBUG_TYPE "bitcode-reader" + +STATISTIC(NumMDStringLoaded, "Number of MDStrings loaded"); +STATISTIC(NumMDNodeTemporary, "Number of MDNode::Temporary created"); +STATISTIC(NumMDRecordLoaded, "Number of Metadata records loaded"); + /// Flag whether we need to import full type definitions for ThinLTO. /// Currently needed for Darwin and LLDB. static cl::opt ImportFullTypeDefinitions( "import-full-type-definitions", cl::init(false), cl::Hidden, cl::desc("Import full type definitions for ThinLTO.")); +static cl::opt DisableLazyLoading( + "disable-ondemand-mds-loading", cl::init(false), cl::Hidden, + cl::desc("Force disable the lazy-loading on-demand of metadata when " + "loading bitcode for importing.")); + namespace { static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; } @@ -165,6 +178,10 @@ void assignValue(Metadata *MD, unsigned Idx); void tryToResolveCycles(); bool hasFwdRefs() const { return !ForwardReference.empty(); } + int getNextFwdRef() { + assert(hasFwdRefs()); + return *ForwardReference.begin(); + } /// Upgrade a type that had an MDString reference. void addTypeRef(MDString &UUID, DICompositeType &CT); @@ -215,6 +232,7 @@ ForwardReference.insert(Idx); // Create and return a placeholder, which will later be RAUW'd. + ++NumMDNodeTemporary; Metadata *MD = MDNode::getTemporary(Context, None).release(); MetadataPtrs[Idx].reset(MD); return MD; @@ -340,8 +358,26 @@ std::deque PHs; public: + bool empty() { return PHs.empty(); } DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID); void flush(BitcodeReaderMetadataList &MetadataList); + + /// Return the list of temporaries nodes in the queue, these need to be + /// loaded before we can flush the queue. + void getTemporaries(BitcodeReaderMetadataList &MetadataList, + DenseSet &Temporaries) { + for (auto &PH : PHs) { + auto ID = PH.getID(); + auto *MD = MetadataList.lookup(ID); + if (!MD) { + Temporaries.insert(ID); + continue; + } + auto *N = dyn_cast_or_null(MD); + if (N && N->isTemporary()) + Temporaries.insert(ID); + } + } }; } // end anonymous namespace @@ -375,6 +411,33 @@ Module &TheModule; std::function getTypeByID; + /// Cursor associated to the lazy-loading of Metadatas. This is the easy way + /// to keep around the right "context" (Abbrev list) to be able to jump in + /// the middle of the metadatas block and load any record. + BitstreamCursor IndexCursor; + + /// Index that keeps track of MDString values. + std::vector MDStringRef; + + /// Populate the index above to enable lazily loading of MDString. + Error indexMetadataStrings(ArrayRef Record, StringRef Blob); + + /// On-demand loading of a single MDString. Requires the index above to be + /// populated. + MDString *lazyLoadOneMDString(unsigned Idx); + + /// Index that keeps track of where to find a metadata record in the stream. + std::vector GlobalMetadatasBitPosIndex; + + /// Populate the index above to enable lazily loading of metadatas, and load + /// the named metadatas as well as the transitively referenced global + /// Metadatas. + Expected lazyLoadModuleMetadataBlock(PlaceholderQueue &Placeholders); + + /// On-demand loading of a single metadata. Requires the index above to be + /// populated. + void lazyLoadOneMetadata(unsigned Idx, PlaceholderQueue &Placeholders); + // Keep mapping of seens pair of old-style CU <-> SP, and update pointers to // point from SP to CU after a block is completly parsed. std::vector> CUSubprograms; @@ -401,6 +464,20 @@ ArrayRef Record); Error parseMetadataKindRecord(SmallVectorImpl &Record); + /// Ensure that all forward-references and placeholders are resolved. + /// Iteratively lazy-loading metadatas on-demand if needed. + void flush(PlaceholderQueue &Placeholders); + + // Upgrade old-style CU <-> SP pointers to point from SP to CU. + void upgradeCUSubprograms() { + for (auto CU_SP : CUSubprograms) + if (auto *SPs = dyn_cast_or_null(CU_SP.second)) + for (auto &Op : SPs->operands()) + if (auto *SP = dyn_cast_or_null(Op)) + SP->replaceOperandWith(7, CU_SP.first); + CUSubprograms.clear(); + } + public: MetadataLoaderImpl(BitstreamCursor &Stream, Module &TheModule, BitcodeReaderValueList &ValueList, @@ -444,20 +521,211 @@ Message, make_error_code(BitcodeError::CorruptedBitcode)); } +Expected MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock( + PlaceholderQueue &Placeholders) { + IndexCursor = Stream; + SmallVector Record; + // Get the abbrevs, and preload record positions to make them lazy-loadable. + while (true) { + BitstreamEntry Entry = IndexCursor.advanceSkippingSubblocks( + BitstreamCursor::AF_DontPopBlockAtEnd); + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: { + return true; + } + case BitstreamEntry::Record: { + // The interesting case. + uint64_t CurrentPos = IndexCursor.GetCurrentBitNo(); + auto Code = IndexCursor.skipRecord(Entry.ID); + switch (Code) { + case bitc::METADATA_STRINGS: { + // Rewind and parse the strings. + IndexCursor.JumpToBit(CurrentPos); + StringRef Blob; + ++NumMDRecordLoaded; + Record.clear(); + IndexCursor.readRecord(Entry.ID, Record, &Blob); + if (auto Err = indexMetadataStrings(Record, Blob)) + return std::move(Err); + break; + } + case bitc::METADATA_INDEX_OFFSET: { + // This is the offset to the index, when we see this we skip all the + // records and load only an index to these. + IndexCursor.JumpToBit(CurrentPos); + ++NumMDRecordLoaded; + Record.clear(); + IndexCursor.readRecord(Entry.ID, Record); + if (Record.size() != 2) + return error("Invalid record"); + auto Offset = Record[0] + (Record[1] << 32); + auto BeginPos = IndexCursor.GetCurrentBitNo(); + IndexCursor.JumpToBit(BeginPos + Offset); + Entry = IndexCursor.advanceSkippingSubblocks( + BitstreamCursor::AF_DontPopBlockAtEnd); + assert(Entry.Kind == BitstreamEntry::Record && + "Corrupted bitcode: Expected `Record` when trying to find the " + "Metadata index"); + Record.clear(); + auto Code = IndexCursor.readRecord(Entry.ID, Record); + (void)Code; + assert(Code == bitc::METADATA_INDEX && "Corrupted bitcode: Expected " + "`METADATA_INDEX` when trying " + "to find the Metadata index"); + + // Delta unpack + auto CurrentValue = BeginPos; + GlobalMetadatasBitPosIndex.reserve(Record.size()); + for (auto &Elt : Record) { + CurrentValue += Elt; + GlobalMetadatasBitPosIndex.push_back(CurrentValue); + } + break; + } + case bitc::METADATA_INDEX: + // We don't expect to get there, the Index is loaded when we encounter + // the offset. + return error("Corrupted Metadata block"); + case bitc::METADATA_NAME: { + // Named metadata need to be materialized now and aren't deferred. + IndexCursor.JumpToBit(CurrentPos); + ++NumMDRecordLoaded; + Record.clear(); + unsigned Code = IndexCursor.readRecord(Entry.ID, Record); + assert(Code == bitc::METADATA_NAME); + + // Read name of the named metadata. + SmallString<8> Name(Record.begin(), Record.end()); + Code = IndexCursor.ReadCode(); + + // Named Metadata comes in two parts, we expect the name to be followed + // by the node + Record.clear(); + unsigned NextBitCode = IndexCursor.readRecord(Code, Record); + assert(NextBitCode == bitc::METADATA_NAMED_NODE); + (void)NextBitCode; + + // Read named metadata elements. + unsigned Size = Record.size(); + NamedMDNode *NMD = TheModule.getOrInsertNamedMetadata(Name); + for (unsigned i = 0; i != Size; ++i) { + // FIXME: could we use a placeholder here? + MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]); + assert(MD && "Invalid record"); + NMD->addOperand(MD); + } + break; + } + case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: { + // FIXME: we need to do this early because we don't materialize global + // value expliticitly. + IndexCursor.JumpToBit(CurrentPos); + Record.clear(); + IndexCursor.readRecord(Entry.ID, Record); + if (Record.size() % 2 == 0) + return error("Invalid record"); + unsigned ValueID = Record[0]; + if (ValueID >= ValueList.size()) + return error("Invalid record"); + if (auto *GO = dyn_cast(ValueList[ValueID])) + if (Error Err = parseGlobalObjectAttachment( + *GO, ArrayRef(Record).slice(1))) + return std::move(Err); + break; + } + case bitc::METADATA_KIND: + case bitc::METADATA_STRING_OLD: + case bitc::METADATA_OLD_FN_NODE: + case bitc::METADATA_OLD_NODE: + case bitc::METADATA_VALUE: + case bitc::METADATA_DISTINCT_NODE: + case bitc::METADATA_NODE: + case bitc::METADATA_LOCATION: + case bitc::METADATA_GENERIC_DEBUG: + case bitc::METADATA_SUBRANGE: + case bitc::METADATA_ENUMERATOR: + case bitc::METADATA_BASIC_TYPE: + case bitc::METADATA_DERIVED_TYPE: + case bitc::METADATA_COMPOSITE_TYPE: + case bitc::METADATA_SUBROUTINE_TYPE: + case bitc::METADATA_MODULE: + case bitc::METADATA_FILE: + case bitc::METADATA_COMPILE_UNIT: + case bitc::METADATA_SUBPROGRAM: + case bitc::METADATA_LEXICAL_BLOCK: + case bitc::METADATA_LEXICAL_BLOCK_FILE: + case bitc::METADATA_NAMESPACE: + case bitc::METADATA_MACRO: + case bitc::METADATA_MACRO_FILE: + case bitc::METADATA_TEMPLATE_TYPE: + case bitc::METADATA_TEMPLATE_VALUE: + case bitc::METADATA_GLOBAL_VAR: + case bitc::METADATA_LOCAL_VAR: + case bitc::METADATA_EXPRESSION: + case bitc::METADATA_OBJC_PROPERTY: + case bitc::METADATA_IMPORTED_ENTITY: + case bitc::METADATA_GLOBAL_VAR_EXPR: + // We don't expect to see any of these, if we see one, give on + // lazy-loading and fallback. + MDStringRef.clear(); + GlobalMetadatasBitPosIndex.clear(); + return false; + } + break; + } + } + } +} + /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing /// module level metadata. Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) { if (!ModuleLevel && MetadataList.hasFwdRefs()) return error("Invalid metadata: fwd refs into function blocks"); + // Record the entry position so that we can jump back here and efficiently + // skip the whole block in case we lazy-load. + auto EntryPos = Stream.GetCurrentBitNo(); + if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) return error("Invalid record"); - unsigned NextMetadataNo = MetadataList.size(); SmallVector Record; - PlaceholderQueue Placeholders; + // We lazy-load module-level metadata: we build an index for each record, and + // then load individual record as needed, starting with the named metadatas. + if (ModuleLevel && IsImporting && MetadataList.empty() && + !DisableLazyLoading) { + auto SuccessOrErr = lazyLoadModuleMetadataBlock(Placeholders); + if (!SuccessOrErr) + return SuccessOrErr.takeError(); + if (SuccessOrErr.get()) { + // An index was successfully created and we will be able to load metadata + // on-demand. + MetadataList.resize(MDStringRef.size() + + GlobalMetadatasBitPosIndex.size()); + + // Reading the named metadata created forward references and/or + // placeholders, that we flush here. + flush(Placeholders); + upgradeCUSubprograms(); + // Return at the beginning of the block, since it is easy to skip it + // entirely from there. + Stream.ReadBlockEnd(); // Pop the abbrev block context. + Stream.JumpToBit(EntryPos); + if (Stream.SkipBlock()) + return error("Invalid record"); + return Error::success(); + } + // Couldn't load an index, fallback to loading all the block "old-style". + } + + unsigned NextMetadataNo = MetadataList.size(); + // Read all the records. while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -467,16 +735,8 @@ case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - // Upgrade old-style CU <-> SP pointers to point from SP to CU. - for (auto CU_SP : CUSubprograms) - if (auto *SPs = dyn_cast_or_null(CU_SP.second)) - for (auto &Op : SPs->operands()) - if (auto *SP = dyn_cast_or_null(Op)) - SP->replaceOperandWith(7, CU_SP.first); - CUSubprograms.clear(); - - MetadataList.tryToResolveCycles(); - Placeholders.flush(MetadataList); + flush(Placeholders); + upgradeCUSubprograms(); return Error::success(); case BitstreamEntry::Record: // The interesting case. @@ -486,6 +746,7 @@ // Read a record. Record.clear(); StringRef Blob; + ++NumMDRecordLoaded; unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob); if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob, ModuleLevel, NextMetadataNo)) @@ -493,6 +754,67 @@ } } +MDString *MetadataLoader::MetadataLoaderImpl::lazyLoadOneMDString(unsigned ID) { + ++NumMDStringLoaded; + if (Metadata *MD = MetadataList.lookup(ID)) + return cast(MD); + auto MDS = MDString::get(Context, MDStringRef[ID]); + MetadataList.assignValue(MDS, ID); + return MDS; +} + +void MetadataLoader::MetadataLoaderImpl::lazyLoadOneMetadata( + unsigned ID, PlaceholderQueue &Placeholders) { + assert(ID < (MDStringRef.size()) + GlobalMetadatasBitPosIndex.size()); + if (auto *MD = MetadataList.lookup(ID)) { + auto *N = dyn_cast_or_null(MD); + if (!N || !N->isTemporary()) + return; + } + assert(ID >= MDStringRef.size() && "Unexpected lazy-loading of MDString"); + + SmallVector Record; + StringRef Blob; + IndexCursor.JumpToBit(GlobalMetadatasBitPosIndex[ID - MDStringRef.size()]); + auto Entry = IndexCursor.advanceSkippingSubblocks(); + ++NumMDRecordLoaded; + unsigned Code = IndexCursor.readRecord(Entry.ID, Record, &Blob); + if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob, false, ID)) + report_fatal_error("Can't lazyload MD"); +} + +// Flush forward references +void MetadataLoader::MetadataLoaderImpl::flush(PlaceholderQueue &Placeholders) { + DenseSet Temporaries; + while (1) { + // Populate Temporaries with the placeholders that haven't been loaded yet. + Placeholders.getTemporaries(MetadataList, Temporaries); + + // If we don't have any temporary, or FwdReference, we're done! + if (Temporaries.empty() && !MetadataList.hasFwdRefs()) + break; + + // First, load all the temporaries. This can add new placeholders or + // forward references. + for (auto ID : Temporaries) + lazyLoadOneMetadata(ID, Placeholders); + Temporaries.clear(); + + // Second, load the forward-references. This can also add new placeholders + // or forward references. + while (MetadataList.hasFwdRefs()) + lazyLoadOneMetadata(MetadataList.getNextFwdRef(), Placeholders); + } + // At this point we don't have any forward reference remaining, or temporary + // that haven't been loaded. We can safely drop RAUW support and mark cycles + // as resolved. + MetadataList.tryToResolveCycles(); + + // Finally, everything is in place, we can replace the placeholders operands + // with the final node they refer to. + Placeholders.flush(MetadataList); +} + Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata( SmallVectorImpl &Record, unsigned Code, PlaceholderQueue &Placeholders, StringRef Blob, bool ModuleLevel, @@ -500,6 +822,8 @@ bool IsDistinct = false; auto getMD = [&](unsigned ID) -> Metadata * { + if (ID < MDStringRef.size()) + return lazyLoadOneMDString(ID); if (!IsDistinct) return MetadataList.getMetadataFwdRef(ID); if (auto *MD = MetadataList.getMetadataIfResolved(ID)) @@ -519,7 +843,8 @@ auto getMDString = [&](unsigned ID) -> MDString * { // This requires that the ID is not really a forward reference. In // particular, the MDString must already have been resolved. - return cast_or_null(getMDOrNull(ID)); + auto MDS = getMDOrNull(ID); + return cast_or_null(MDS); }; // Support for old type refs. @@ -539,6 +864,7 @@ Record.clear(); Code = Stream.ReadCode(); + ++NumMDRecordLoaded; unsigned NextBitCode = Stream.readRecord(Code, Record); if (NextBitCode != bitc::METADATA_NAMED_NODE) return error("METADATA_NAME not followed by METADATA_NAMED_NODE"); @@ -1137,12 +1463,14 @@ // Test for upgrading !llvm.loop. HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String); - + ++NumMDStringLoaded; Metadata *MD = MDString::get(Context, String); MetadataList.assignValue(MD, NextMetadataNo++); break; } case bitc::METADATA_STRINGS: + if (ModuleLevel && !MDStringRef.empty()) + return error("Invalid loading of MDStrings record with indexing enabled"); if (Error Err = parseMetadataStrings(Record, Blob, NextMetadataNo)) return Err; break; @@ -1166,7 +1494,43 @@ break; } } + return Error::success(); #undef GET_OR_DISTINCT +} + +Error MetadataLoader::MetadataLoaderImpl::indexMetadataStrings( + ArrayRef Record, StringRef Blob) { + // All the MDStrings in the block are emitted together in a single + // record. The strings are concatenated and stored in a blob along with + // their sizes. + if (Record.size() != 2) + return error("Invalid record: metadata strings layout"); + + unsigned NumStrings = Record[0]; + unsigned StringsOffset = Record[1]; + if (!NumStrings) + return error("Invalid record: metadata strings with no strings"); + if (StringsOffset > Blob.size()) + return error("Invalid record: metadata strings corrupt offset"); + + MDStringRef.reserve(NumStrings); + StringRef Lengths = Blob.slice(0, StringsOffset); + SimpleBitstreamCursor R(Lengths); + + StringRef Strings = Blob.drop_front(StringsOffset); + do { + if (R.AtEndOfStream()) + return error("Invalid record: metadata strings bad length"); + + unsigned Size = R.ReadVBR(6); + if (Strings.size() < Size) + return error("Invalid record: metadata strings truncated chars"); + + MDStringRef.push_back(Strings.slice(0, Size)); + + Strings = Strings.drop_front(Size); + } while (--NumStrings); + return Error::success(); } @@ -1197,6 +1561,7 @@ if (Strings.size() < Size) return error("Invalid record: metadata strings truncated chars"); + ++NumMDStringLoaded; MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)), NextMetadataNo++); Strings = Strings.drop_front(Size); @@ -1228,6 +1593,8 @@ SmallVector Record; + PlaceholderQueue Placeholders; + while (true) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -1236,6 +1603,7 @@ case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: + flush(Placeholders); return Error::success(); case BitstreamEntry::Record: // The interesting case. @@ -1244,6 +1612,7 @@ // Read a metadata attachment record. Record.clear(); + ++NumMDRecordLoaded; switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; @@ -1268,7 +1637,14 @@ if (I->second == LLVMContext::MD_tbaa && StripTBAA) continue; - Metadata *Node = MetadataList.getMetadataFwdRef(Record[i + 1]); + auto Idx = Record[i + 1]; + if (Idx < (MDStringRef.size() + GlobalMetadatasBitPosIndex.size()) && + !MetadataList.lookup(Idx)) + // Load the attachment if it is in the lazy-loadable range and hasn't + // been loaded yet. + lazyLoadOneMetadata(Idx, Placeholders); + + Metadata *Node = MetadataList.getMetadataFwdRef(Idx); if (isa(Node)) // Drop the attachment. This used to be legal, but there's no // upgrade path. @@ -1331,6 +1707,7 @@ // Read a record. Record.clear(); + ++NumMDRecordLoaded; unsigned Code = Stream.readRecord(Entry.ID, Record); switch (Code) { default: // Default behavior: ignore. Index: llvm/lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- llvm/lib/Transforms/IPO/FunctionImport.cpp +++ llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -625,7 +625,6 @@ // now, before linking it (otherwise this will be a noop). if (Error Err = SrcModule->materializeMetadata()) return std::move(Err); - UpgradeDebugInfo(*SrcModule); auto &ImportGUIDs = FunctionsToImportPerModule->second; // Find the globals to import @@ -698,6 +697,10 @@ } } + // Upgrade debug info after we're done materialize all the globals and we + // have loaded all the metadatas! + UpgradeDebugInfo(*SrcModule); + // Link in the specified functions. if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport)) return true;