diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -58,7 +58,7 @@ static constexpr size_t size = std::tuple_size::value; - ASTFileSignature(BaseT S = {{0}}) : BaseT(std::move(S)) {} + constexpr ASTFileSignature(BaseT S = {{0}}) : BaseT(std::move(S)) {} explicit operator bool() const { return *this != BaseT({{0}}); } @@ -81,6 +81,12 @@ return Sentinel; } + static ASTFileSignature createDummy() { + ASTFileSignature Dummy; + Dummy.fill(0x00); + return Dummy; + } + template static ASTFileSignature create(InputIt First, InputIt Last) { assert(std::distance(First, Last) == size && diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 28; +const unsigned VERSION_MAJOR = 29; /// AST file minor version number supported by this version of /// Clang. diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -128,10 +128,17 @@ /// The module we're currently writing, if any. Module *WritingModule = nullptr; - /// The offset of the first bit inside the AST_BLOCK. + /// The byte range representing all the UNHASHED_CONTROL_BLOCK. + std::pair UnhashedControlBlockRange; + /// The bit offset of the AST block hash blob. + uint64_t ASTBlockHashOffset = 0; + /// The bit offset of the signature blob. + uint64_t SignatureOffset = 0; + + /// The bit offset of the first bit inside the AST_BLOCK. uint64_t ASTBlockStartOffset = 0; - /// The range representing all the AST_BLOCK. + /// The byte range representing all the AST_BLOCK. std::pair ASTBlockRange; /// The base directory for any relative paths we emit. @@ -495,12 +502,11 @@ StringRef isysroot); /// Write out the signature and diagnostic options, and return the signature. - ASTFileSignature writeUnhashedControlBlock(Preprocessor &PP, - ASTContext &Context); + void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context); + ASTFileSignature backpatchSignature(); /// Calculate hash of the pcm content. - static std::pair - createSignature(StringRef AllBytes, StringRef ASTBlockBytes); + std::pair createSignature() const; void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts); void WriteSourceManagerBlock(SourceManager &SourceMgr, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -4734,12 +4734,6 @@ ShouldFinalizePCM = true; return Success; - case UNHASHED_CONTROL_BLOCK_ID: - // This block is handled using look-ahead during ReadControlBlock. We - // shouldn't get here! - Error("malformed block record in AST file"); - return Failure; - default: if (llvm::Error Err = Stream.SkipBlock()) { Error(std::move(Err)); @@ -4859,13 +4853,18 @@ } switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) { case SIGNATURE: - if (F) - F->Signature = ASTFileSignature::create(Record.begin(), Record.end()); + if (F) { + F->Signature = ASTFileSignature::create(Blob.begin(), Blob.end()); + assert(F->Signature != ASTFileSignature::createDummy() && + "Dummy AST file signature not backpatched in ASTWriter."); + } break; case AST_BLOCK_HASH: - if (F) - F->ASTBlockHash = - ASTFileSignature::create(Record.begin(), Record.end()); + if (F) { + F->ASTBlockHash = ASTFileSignature::create(Blob.begin(), Blob.end()); + assert(F->ASTBlockHash != ASTFileSignature::createDummy() && + "Dummy AST block hash not backpatched in ASTWriter."); + } break; case DIAGNOSTIC_OPTIONS: { bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0; @@ -5167,9 +5166,12 @@ consumeError(MaybeRecord.takeError()); return ASTFileSignature(); } - if (SIGNATURE == MaybeRecord.get()) - return ASTFileSignature::create(Record.begin(), - Record.begin() + ASTFileSignature::size); + if (SIGNATURE == MaybeRecord.get()) { + auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end()); + assert(Signature != ASTFileSignature::createDummy() && + "Dummy AST file signature not backpatched in ASTWriter."); + return Signature; + } } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1120,50 +1120,96 @@ } std::pair -ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) { +ASTWriter::createSignature() const { + StringRef AllBytes(Buffer.data(), Buffer.size()); + llvm::SHA1 Hasher; - Hasher.update(ASTBlockBytes); + Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second)); ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result()); - // Add the remaining bytes (i.e. bytes before the unhashed control block that - // are not part of the AST block). - Hasher.update( - AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin())); + // Add the remaining bytes: + // 1. Before the unhashed control block. + Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first)); + // 2. Between the unhashed control block and the AST block. Hasher.update( - AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end())); + AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first)); + // 3. After the AST block. + Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos)); ASTFileSignature Signature = ASTFileSignature::create(Hasher.result()); return std::make_pair(ASTBlockHash, Signature); } -ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, - ASTContext &Context) { +ASTFileSignature ASTWriter::backpatchSignature() { + if (!WritingModule || + !PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) + return {}; + + // For implicit modules, write the hash of the PCM as its signature. + ASTFileSignature Signature; + auto BackpatchSignatureAt = [&](const ASTFileSignature &S, uint64_t BitNo) { + using WordT = unsigned; + std::array Words; + static_assert(sizeof(Words) == sizeof(S)); + std::memcpy(Words.data(), S.data(), sizeof(ASTFileSignature)); + for (WordT Word : Words) { + Stream.BackpatchWord(BitNo, Word); + BitNo += sizeof(WordT) * 8; + } + }; + + ASTFileSignature ASTBlockHash; + std::tie(ASTBlockHash, Signature) = createSignature(); + + BackpatchSignatureAt(ASTBlockHash, ASTBlockHashOffset); + BackpatchSignatureAt(Signature, SignatureOffset); + + return Signature; +} + +void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, + ASTContext &Context) { using namespace llvm; // Flush first to prepare the PCM hash (signature). Stream.FlushToWord(); - auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3; + UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3; // Enter the block and prepare to write records. RecordData Record; Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5); // For implicit modules, write the hash of the PCM as its signature. - ASTFileSignature Signature; if (WritingModule && PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) { - ASTFileSignature ASTBlockHash; - auto ASTBlockStartByte = ASTBlockRange.first >> 3; - auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte; - std::tie(ASTBlockHash, Signature) = createSignature( - StringRef(Buffer.begin(), StartOfUnhashedControl), - StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength)); - - Record.append(ASTBlockHash.begin(), ASTBlockHash.end()); - Stream.EmitRecord(AST_BLOCK_HASH, Record); + // At this point, we don't know the actual signature of the file or the AST + // block - we're only able to compute those at the end of the serialization + // process. Let's store dummy signatures for now, and replace them with the + // real ones later on. + // The bitstream VBR-encodes record elements, which makes backpatching them + // really difficult. Let's store the signatures as blobs instead - they are + // guaranteed to be word-aligned, and we control their format/encoding. + auto Dummy = ASTFileSignature::createDummy(); + SmallString<128> Blob{Dummy.begin(), Dummy.end()}; + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); + + Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(SIGNATURE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); + + Record.push_back(AST_BLOCK_HASH); + Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob); + ASTBlockHashOffset = Stream.GetCurrentBitNo() - Blob.size() * 8; Record.clear(); - Record.append(Signature.begin(), Signature.end()); - Stream.EmitRecord(SIGNATURE, Record); + + Record.push_back(SIGNATURE); + Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob); + SignatureOffset = Stream.GetCurrentBitNo() - Blob.size() * 8; Record.clear(); } @@ -1232,7 +1278,7 @@ // Leave the options block. Stream.ExitBlock(); - return Signature; + UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3; } /// Write the control block. @@ -4690,6 +4736,7 @@ ASTContext &Context = SemaRef.Context; Preprocessor &PP = SemaRef.PP; + writeUnhashedControlBlock(PP, Context); collectNonAffectingInputFiles(); @@ -4838,7 +4885,7 @@ // Write the remaining AST contents. Stream.FlushToWord(); - ASTBlockRange.first = Stream.GetCurrentBitNo(); + ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3; Stream.EnterSubblock(AST_BLOCK_ID, 5); ASTBlockStartOffset = Stream.GetCurrentBitNo(); @@ -5191,13 +5238,13 @@ Stream.EmitRecord(STATISTICS, Record); Stream.ExitBlock(); Stream.FlushToWord(); - ASTBlockRange.second = Stream.GetCurrentBitNo(); + ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3; // Write the module file extension blocks. for (const auto &ExtWriter : ModuleFileExtensionWriters) WriteModuleFileExtension(SemaRef, *ExtWriter); - return writeUnhashedControlBlock(PP, Context); + return backpatchSignature(); } void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp --- a/clang/lib/Serialization/GlobalModuleIndex.cpp +++ b/clang/lib/Serialization/GlobalModuleIndex.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/FileManager.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Serialization/ASTBitCodes.h" +#include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/PCHContainerOperations.h" #include "llvm/ADT/DenseMap.h" @@ -697,9 +698,12 @@ } // Get Signature. - if (State == DiagnosticOptionsBlock && Code == SIGNATURE) - getModuleFileInfo(File).Signature = ASTFileSignature::create( - Record.begin(), Record.begin() + ASTFileSignature::size); + if (State == DiagnosticOptionsBlock && Code == SIGNATURE) { + auto Signature = ASTFileSignature::create(Blob.begin(), Blob.end()); + assert(Signature != ASTFileSignature::createDummy() && + "Dummy AST file signature not backpatched in ASTWriter."); + getModuleFileInfo(File).Signature = Signature; + } // We don't care about this record. } diff --git a/clang/test/Modules/ASTSignature.c b/clang/test/Modules/ASTSignature.c --- a/clang/test/Modules/ASTSignature.c +++ b/clang/test/Modules/ASTSignature.c @@ -1,6 +1,6 @@ // RUN: rm -rf %t -// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \ -// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only \ +// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \ // RUN: -fmodules-cache-path=%t -fdisable-module-hash %s // RUN: cp %t/MyHeader2.pcm %t1.pcm // RUN: rm -rf %t @@ -8,17 +8,18 @@ // RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \ // RUN: -fmodules-cache-path=%t -fdisable-module-hash %s // RUN: cp %t/MyHeader2.pcm %t2.pcm -// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump -// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump +// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t1.pcm > %t1.dump +// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t2.pcm > %t2.dump // RUN: cat %t1.dump %t2.dump | FileCheck %s #include "my_header_2.h" my_int var = 42; -// CHECK: [[AST_BLOCK_HASH:]] -// CHECK: [[SIGNATURE:]] -// CHECK: [[AST_BLOCK_HASH]] -// CHECK-NOT: [[SIGNATURE]] -// The modules built by this test are designed to yield the same AST. If this -// test fails, it means that the AST block is has become non-relocatable. +// CHECK: blob data = '[[AST_BLOCK_HASH:.*]]' +// CHECK: blob data = '[[SIGNATURE:.*]]' +// CHECK: blob data = '[[AST_BLOCK_HASH]]' +// CHECK-NOT: blob data = '[[SIGNATURE]]' +// The modules built by this test are designed to yield the same AST but distinct AST files. +// If this test fails, it means that either the AST block has become non-relocatable, +// or the file signature stopped hashing some parts of the AST file.