diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp --- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp +++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp @@ -438,6 +438,20 @@ /// Index that keeps track of where to find a metadata record in the stream. std::vector GlobalMetadataBitPosIndex; + /// Cursor position of the start of the global decl attachments, to enable + /// loading using the index built for lazy loading, instead of forward + /// references. + uint64_t GlobalDeclAttachmentPos = 0; + +#ifndef NDEBUG + /// Sanity check that we end up parsing all of the global decl attachments. + unsigned NumGlobalDeclAttachSkipped = 0; + unsigned NumGlobalDeclAttachParsed = 0; +#endif + + /// Load the global decl attachments, using the index built for lazy loading. + Expected loadGlobalDeclAttachments(); + /// Populate the index above to enable lazily loading of metadata, and load /// the named metadata as well as the transitively referenced global /// Metadata. @@ -681,8 +695,10 @@ MetadataLoader::MetadataLoaderImpl::lazyLoadModuleMetadataBlock() { IndexCursor = Stream; SmallVector Record; + GlobalDeclAttachmentPos = 0; // Get the abbrevs, and preload record positions to make them lazy-loadable. while (true) { + uint64_t SavedPos = IndexCursor.GetCurrentBitNo(); Expected MaybeEntry = IndexCursor.advanceSkippingSubblocks( BitstreamCursor::AF_DontPopBlockAtEnd); if (!MaybeEntry) @@ -817,25 +833,11 @@ break; } case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: { - // FIXME: we need to do this early because we don't materialize global - // value explicitly. - if (Error Err = IndexCursor.JumpToBit(CurrentPos)) - return std::move(Err); - Record.clear(); - if (Expected MaybeRecord = - IndexCursor.readRecord(Entry.ID, Record)) - ; - else - return MaybeRecord.takeError(); - if (Record.size() % 2 == 0) - return error("Invalid record"); - unsigned ValueID = Record[0]; - if (ValueID >= ValueList.size()) - return error("Invalid record"); - if (auto *GO = dyn_cast(ValueList[ValueID])) - if (Error Err = parseGlobalObjectAttachment( - *GO, ArrayRef(Record).slice(1))) - return std::move(Err); + if (!GlobalDeclAttachmentPos) + GlobalDeclAttachmentPos = SavedPos; +#ifndef NDEBUG + NumGlobalDeclAttachSkipped++; +#endif break; } case bitc::METADATA_KIND: @@ -885,6 +887,83 @@ } } +// Load the global decl attachments after building the lazy loading index. +// We don't load them "lazily" - all global decl attachments must be +// parsed since they aren't materialized on demand. However, by delaying +// their parsing until after the index is created, we can use the index +// instead of creating temporaries. +Expected MetadataLoader::MetadataLoaderImpl::loadGlobalDeclAttachments() { + // Nothing to do if we didn't find any of these metadata records. + if (!GlobalDeclAttachmentPos) + return true; + // Use a temporary cursor so that we don't mess up the main Stream cursor or + // the lazy loading IndexCursor (which holds the necessary abbrev ids). + BitstreamCursor TempCursor = Stream; + SmallVector Record; + // Jump to the position before the first global decl attachment, so we can + // scan for the first BitstreamEntry record. + if (Error Err = TempCursor.JumpToBit(GlobalDeclAttachmentPos)) + return std::move(Err); + while (true) { + Expected MaybeEntry = TempCursor.advanceSkippingSubblocks( + BitstreamCursor::AF_DontPopBlockAtEnd); + if (!MaybeEntry) + return MaybeEntry.takeError(); + BitstreamEntry Entry = MaybeEntry.get(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + // Sanity check that we parsed them all. + assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed); + return true; + case BitstreamEntry::Record: + break; + } + uint64_t CurrentPos = TempCursor.GetCurrentBitNo(); + Expected MaybeCode = TempCursor.skipRecord(Entry.ID); + if (!MaybeCode) + return MaybeCode.takeError(); + if (MaybeCode.get() != bitc::METADATA_GLOBAL_DECL_ATTACHMENT) { + // Anything other than a global decl attachment signals the end of + // these records. sanity check that we parsed them all. + assert(NumGlobalDeclAttachSkipped == NumGlobalDeclAttachParsed); + return true; + } +#ifndef NDEBUG + NumGlobalDeclAttachParsed++; +#endif + // FIXME: we need to do this early because we don't materialize global + // value explicitly. + if (Error Err = TempCursor.JumpToBit(CurrentPos)) + return std::move(Err); + Record.clear(); + if (Expected MaybeRecord = + TempCursor.readRecord(Entry.ID, Record)) + ; + else + return MaybeRecord.takeError(); + if (Record.size() % 2 == 0) + return error("Invalid record"); + unsigned ValueID = Record[0]; + if (ValueID >= ValueList.size()) + return error("Invalid record"); + if (auto *GO = dyn_cast(ValueList[ValueID])) { + // Need to save and restore the current position since + // parseGlobalObjectAttachment will resolve all forward references which + // would require parsing from locations stored in the index. + CurrentPos = TempCursor.GetCurrentBitNo(); + if (Error Err = parseGlobalObjectAttachment( + *GO, ArrayRef(Record).slice(1))) + return std::move(Err); + if (Error Err = TempCursor.JumpToBit(CurrentPos)) + return std::move(Err); + } + } +} + /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing /// module level metadata. Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) { @@ -914,6 +993,14 @@ MetadataList.resize(MDStringRef.size() + GlobalMetadataBitPosIndex.size()); + // Now that we have built the index, load the global decl attachments + // that were deferred during that process. This avoids creating + // temporaries. + SuccessOrErr = loadGlobalDeclAttachments(); + if (!SuccessOrErr) + return SuccessOrErr.takeError(); + assert(SuccessOrErr.get()); + // Reading the named metadata created forward references and/or // placeholders, that we flush here. resolveForwardRefsAndPlaceholders(Placeholders); @@ -2021,7 +2108,8 @@ auto K = MDKindMap.find(Record[I]); if (K == MDKindMap.end()) return error("Invalid ID"); - MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[I + 1]); + MDNode *MD = + dyn_cast_or_null(getMetadataFwdRefOrLoad(Record[I + 1])); if (!MD) return error("Invalid metadata attachment: expect fwd ref to MDNode"); GO.addMetadata(K->second, *MD); diff --git a/llvm/test/ThinLTO/X86/Inputs/devirt2.ll b/llvm/test/ThinLTO/X86/Inputs/devirt2.ll --- a/llvm/test/ThinLTO/X86/Inputs/devirt2.ll +++ b/llvm/test/ThinLTO/X86/Inputs/devirt2.ll @@ -13,28 +13,28 @@ @_ZTV1E = constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.E*, i32)* @_ZN1E1mEi to i8*)] }, !type !4 define i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) #0 { - ret i32 0; + ret i32 0 } define internal i32 @_ZN1A1nEi(%struct.A* %this, i32 %a) #0 { - ret i32 0; + ret i32 0 } define i32 @_ZN1C1fEi(%struct.C* %this, i32 %a) #0 { - ret i32 0; + ret i32 0 } define linkonce_odr i32 @_ZN1D1mEi(%struct.D* %this, i32 %a) #0 { - ret i32 0; + ret i32 0 } define internal i32 @_ZN1E1mEi(%struct.E* %this, i32 %a) #0 { - ret i32 0; + ret i32 0, !dbg !12 } define i32 @test2(%struct.E* %obj, i32 %a) { entry: - %0 = bitcast %struct.E* %obj to i8*** + %0 = bitcast %struct.E* %obj to i8***, !dbg !12 %vtable2 = load i8**, i8*** %0 %1 = bitcast i8** %vtable2 to i8* %p2 = call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1E") @@ -52,8 +52,24 @@ declare i1 @llvm.type.test(i8*, metadata) declare void @llvm.assume(i1) +!llvm.dbg.cu = !{!5} +!llvm.module.flags = !{!13, !14, !15} + !0 = !{i64 16, !"_ZTS1A"} !1 = !{i64 16, !"_ZTS1B"} !2 = !{i64 16, !"_ZTS1C"} !3 = !{i64 16, !"_ZTS1D"} !4 = !{i64 16, !"_ZTS1E"} +!5 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !6, producer: "clang version 12.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !7, splitDebugInlining: false, nameTableKind: None) +!6 = !DIFile(filename: "test.cc", directory: "/tmp") +!7 = !{} +!8 = distinct !DISubprogram(name: "bar", linkageName: "_Z5barv", scope: !6, file: !6, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !5, retainedNodes: !7) +!9 = !DISubroutineType(types: !10) +!10 = !{!11} +!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!12 = !DILocation(line: 2, column: 3, scope: !8, inlinedAt: !16) +!13 = !{i32 7, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{i32 1, !"wchar_size", i32 4} +!16 = !DILocation(line: 1, column: 1, scope: !17) +!17 = distinct !DISubprogram(name: "foo", linkageName: "_Z5foov", scope: !6, file: !6, line: 1, type: !9, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !5, retainedNodes: !7) diff --git a/llvm/test/ThinLTO/X86/devirt2.ll b/llvm/test/ThinLTO/X86/devirt2.ll --- a/llvm/test/ThinLTO/X86/devirt2.ll +++ b/llvm/test/ThinLTO/X86/devirt2.ll @@ -15,8 +15,10 @@ ; ENABLESPLITFLAG: !{i32 1, !"EnableSplitLTOUnit", i32 1} ; Generate unsplit module with summary for ThinLTO index-based WPD. -; RUN: opt -thinlto-bc -o %t3.o %s -; RUN: opt -thinlto-bc -o %t4.o %p/Inputs/devirt2.ll +; Force generation of the bitcode index so that we also test lazy metadata +; loader handling of the type metadata. +; RUN: opt -bitcode-mdindex-threshold=0 -thinlto-bc -o %t3.o %s +; RUN: opt -bitcode-mdindex-threshold=0 -thinlto-bc -o %t4.o %p/Inputs/devirt2.ll ; Check that we don't have module flag when splitting not enabled for ThinLTO, ; and that we generate summary information needed for index-based WPD.