diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -54,6 +54,8 @@ bool IsThinLTO; bool HasSummary; bool EnableSplitLTOUnit; + bool OnlyHasThinLTOIndex; // Bitcode only contains ThinLTO index (i.e., + // without function IR, etc) }; /// Represents a module in a bitcode file. diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -117,6 +117,8 @@ // IFUNC: [ifunc value type, addrspace, resolver val#, linkage, visibility] MODULE_CODE_IFUNC = 18, + + MODULE_CODE_THINLTO_INDEX_FLAG = 19, // ThinLTO index related flag: [uint64_t] }; /// PARAMATTR blocks have code for defining a parameter attribute set. diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -140,6 +140,7 @@ STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED) STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) STRINGIFY_CODE(MODULE_CODE, HASH) + STRINGIFY_CODE(MODULE_CODE, THINLTO_INDEX_FLAG) } case bitc::IDENTIFICATION_BLOCK_ID: switch (CodeID) { diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -3776,7 +3776,8 @@ if (!MaybeBitCode) return MaybeBitCode.takeError(); switch (unsigned BitCode = MaybeBitCode.get()) { - default: break; // Default behavior, ignore unknown content. + default: + break; // Default behavior, ignore unknown or irrelevant content. case bitc::MODULE_CODE_VERSION: { Expected VersionOrErr = parseVersionRecord(Record); if (!VersionOrErr) @@ -5960,7 +5961,7 @@ return MaybeBitCode.takeError(); switch (MaybeBitCode.get()) { default: - break; // Default behavior, ignore unknown content. + break; // Default behavior, ignore unknown or irrelevant content. case bitc::MODULE_CODE_VERSION: { if (Error Err = parseVersionRecord(Record).takeError()) return Err; @@ -6987,7 +6988,47 @@ if (Error Err = Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return std::move(Err); + // Initialize LTOInfo. + struct BitcodeLTOInfo LTOInfo = { + .IsThinLTO = false, + .HasSummary = false, + .EnableSplitLTOUnit = false, + .OnlyHasThinLTOIndex = false, + }; + + // We want to read two types of data from module; + // 1. The record with code `MODULE_CODE_THINLTO_INDEX_FLAG` in the module + // block. + // 2. If present, subblocks related with ThinLTO (block id + // `GLOBALVAL_SUMMARY_BLOCK_ID`). + + // To bail out early after both types of data are read, or skip records if + // `MODULE_CODE_THINLTO_INDEX_FLAG` are read already, use ReadState to + // maintain which states are read. + // + // Valid states: + // - 0 : Neither is read. + // - ThinLTOIndexFlagRead : The record with code + // `MODULE_CODE_THINLTO_INDEX_FLAG` is read; the rest of records could be read + // in a skipping way. + // - ThinLTOSubblockRead : The subblocks related with ThinLTO is read. + // - BothRead : Both types of data are found. Bail out and return + // BitcodeLTOInfo. + // + // The fact that real-world writers write record of ID + // MODULE_CODE_THINLTO_INDEX_FLAG before subblocks assists optimization, but + // correctness doesn't depend on the order. + static const uint8_t ThinLTOIndexFlagRead = 1; + static const uint8_t ThinLTOSubblockRead = 2; + static const uint8_t BothRead = (ThinLTOIndexFlagRead | ThinLTOSubblockRead); + + uint8_t ReadState = 0; + + uint64_t ThinLTOIndexFlag = 0; + + SmallVector Record; while (true) { + llvm::BitstreamEntry Entry; if (Error E = Stream.advance().moveInto(Entry)) return std::move(E); @@ -6996,17 +7037,30 @@ case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: - return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false, - /*EnableSplitLTOUnit=*/false}; + return LTOInfo; - case BitstreamEntry::SubBlock: + case BitstreamEntry::SubBlock: { if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) { Expected EnableSplitLTOUnit = getEnableSplitLTOUnitFlag(Stream, Entry.ID); if (!EnableSplitLTOUnit) return EnableSplitLTOUnit.takeError(); - return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true, - *EnableSplitLTOUnit}; + + LTOInfo.IsThinLTO = true; + LTOInfo.HasSummary = true; + LTOInfo.EnableSplitLTOUnit = *EnableSplitLTOUnit; + + ReadState |= ThinLTOSubblockRead; + if (ReadState & ThinLTOIndexFlagRead) { + LTOInfo.OnlyHasThinLTOIndex = (ThinLTOIndexFlag & 1); + return LTOInfo; + } + + // At this point, subblock is read but THINLTO_INDEX_FLAG is not read + // yet. This should be very rare (if happens at all) since real-world + // bitcode writer writes record of `MODULE_CODE_THINLTO_INDEX_FLAG` + // before subblocks. + continue; } if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) { @@ -7014,8 +7068,12 @@ getEnableSplitLTOUnitFlag(Stream, Entry.ID); if (!EnableSplitLTOUnit) return EnableSplitLTOUnit.takeError(); - return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true, - *EnableSplitLTOUnit}; + LTOInfo.HasSummary = true; + LTOInfo.EnableSplitLTOUnit = *EnableSplitLTOUnit; + // assert((!LTOInfo.IsThinLTO) && "Expected not to be ThinLTO"); + // assert((ThinLTOIndexFlag == 0) && "Expected ThinLTOIndexFlag to be + // zero"); + return LTOInfo; } // Ignore other sub-blocks. @@ -7024,10 +7082,33 @@ continue; case BitstreamEntry::Record: - if (Expected StreamFailed = Stream.skipRecord(Entry.ID)) - continue; - else - return StreamFailed.takeError(); + // Read records in a skipping fashion if the target record is read + // already. + if (ReadState & ThinLTOIndexFlagRead) { + if (Error E = Stream.skipRecord(Entry.ID).takeError()) + return std::move(E); + } else { + // Read record iff ThinLTO Index flag is not read yet. + Record.clear(); + Expected MaybeCode = Stream.readRecord(Entry.ID, Record); + if (!MaybeCode) + return MaybeCode.takeError(); + + if (MaybeCode.get() == bitc::MODULE_CODE_THINLTO_INDEX_FLAG) { + ReadState |= ThinLTOIndexFlagRead; + + // assert(Record.size() == 1); + + ThinLTOIndexFlag = Record[0]; + } + } + break; + } + } + + if (ReadState == BothRead) { + LTOInfo.OnlyHasThinLTOIndex = (ThinLTOIndexFlag & 1); + return LTOInfo; } } } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4395,6 +4395,8 @@ size_t BlockStartPos = Buffer.size(); writeModuleVersion(); + Stream.EmitRecord(bitc::MODULE_CODE_THINLTO_INDEX_FLAG, + ArrayRef{0}); // Emit blockinfo, which defines the standard abbreviations etc. writeBlockInfo(); @@ -4655,6 +4657,9 @@ writeModuleVersion(); + Stream.EmitRecord(bitc::MODULE_CODE_THINLTO_INDEX_FLAG, + ArrayRef{0}); + // Write the module paths in the combined index. writeModStrings(); @@ -4797,6 +4802,8 @@ Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); writeModuleVersion(); + Stream.EmitRecord(bitc::MODULE_CODE_THINLTO_INDEX_FLAG, + ArrayRef{1}); writeSimplifiedModuleInfo(); diff --git a/llvm/test/Bitcode/bcanalyzer-thinlto-index-flag.ll b/llvm/test/Bitcode/bcanalyzer-thinlto-index-flag.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Bitcode/bcanalyzer-thinlto-index-flag.ll @@ -0,0 +1,32 @@ +; Tests that record of `MODULE_CODE_THINLTO_INDEX_FLAG` in a module block could be generated correctly. +; Only last bit of THINLTO_INDEX_FLAG is used, and so desired value of THINLTO_INDEX_FLAG op0 is 0 if bitcode is not the minimized bitcode for thin link only. + +; IR to bitcode; so THINLTO_INDEX_FLAG op0 is 0. +; RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s + +; %t.o is the thin-lto ready bitcode, and op0 of THINLTO_INDEX_FLAG is 0. +; %t.thinlink.bc is the minimized bitcode, so op0 of THINLTO_INDEX_FLAG is 1. +; RUN: opt -thinlto-bc %s -thin-link-bitcode-file=%t.thinlink.bc -o %t.o +; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s --check-prefix=NO_INDEX_ONLY +; RUN: llvm-bcanalyzer -dump %t.thinlink.bc | FileCheck %s --check-prefix=INDEX_ONLY + +; Tests that `llvm-dis` could disassemble minimized bitcode, and verify its content. +; RUN: llvm-dis %t.thinlink.bc -o - | FileCheck %s --check-prefix=DIS + +; CHECK: + +; NO_INDEX_ONLY: + +; INDEX_ONLY: + +; DIS: ^0 = module: (path: "{{.*}}bcanalyzer-thinlto-index-flag.ll.tmp.thinlink.bc", hash: (2375937533, 1714591073, 2662917323, 3603576641, 172395240)) +; DIS: ^1 = gv: (name: "aplusb", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 2))) ; guid = 15717176516085531008 +; DIS: ^2 = blockcount: 1 + +source_filename = "add.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" +define i32 @aplusb(i32 %a, i32 %b) { + %add = add i32 %b, %a + ret i32 %add +} \ No newline at end of file diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -74,6 +74,10 @@ "then materialize only the metadata"), cl::cat(DisCategory)); +static cl::opt DumpThinLTOIndexOnly( + "dump-thinlto-index-only", + cl::desc("Only read thinlto index and dump it as a bitcode"), + cl::init(false), cl::Hidden, cl::cat(DisCategory)); namespace { static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) { @@ -186,14 +190,19 @@ for (size_t I = 0; I < N; ++I) { BitcodeModule MB = IF.Mods[I]; - std::unique_ptr M = ExitOnErr( - MB.getLazyModule(Context, MaterializeMetadata, SetImporting)); - if (MaterializeMetadata) - ExitOnErr(M->materializeMetadata()); - else - ExitOnErr(M->materializeAll()); BitcodeLTOInfo LTOInfo = ExitOnErr(MB.getLTOInfo()); + + std::unique_ptr M; + if ((!DumpThinLTOIndexOnly) && (!LTOInfo.OnlyHasThinLTOIndex)) { + M = ExitOnErr( + MB.getLazyModule(Context, MaterializeMetadata, SetImporting)); + if (MaterializeMetadata) + ExitOnErr(M->materializeMetadata()); + else + ExitOnErr(M->materializeAll()); + } + std::unique_ptr Index; if (LTOInfo.HasSummary) Index = ExitOnErr(MB.getSummary()); @@ -233,7 +242,8 @@ // All that llvm-dis does is write the assembly to a file. if (!DontPrint) { - M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder); + if (M) + M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder); if (Index) Index->print(Out->os()); }