diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -58,7 +58,7 @@ static constexpr size_t size = std::tuple_size::value; - ASTFileSignature(BaseT S = {{0}}) : BaseT(std::move(S)) {} + constexpr ASTFileSignature(BaseT S = {{0}}) : BaseT(std::move(S)) {} explicit operator bool() const { return *this != BaseT({{0}}); } diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 28; +const unsigned VERSION_MAJOR = 29; /// AST file minor version number supported by this version of /// Clang. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -2394,6 +2394,9 @@ llvm::function_ref Visitor); bool isProcessingUpdateRecords() { return ProcessingUpdateRecords; } + + /// Read the AST file signature out of a binary blob. + static ASTFileSignature readSignature(const char *Blob); }; } // namespace clang diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -111,7 +111,7 @@ llvm::BitstreamWriter &Stream; /// The buffer associated with the bitstream. - const SmallVectorImpl &Buffer; + SmallVectorImpl &Buffer; /// The PCM manager which manages memory buffers for pcm files. InMemoryModuleCache &ModuleCache; @@ -128,10 +128,17 @@ /// The module we're currently writing, if any. Module *WritingModule = nullptr; + /// The byte range representing all the UNHASHED_CONTROL_BLOCK. + std::pair UnhashedControlBlockRange; + /// The byte offset of the AST block hash blob. + uint64_t ASTBlockHashOffset = 0; + /// The byte offset of the signature blob. + uint64_t SignatureOffset = 0; + /// The offset of the first bit inside the AST_BLOCK. uint64_t ASTBlockStartOffset = 0; - /// The range representing all the AST_BLOCK. + /// The byte range representing all the AST_BLOCK. std::pair ASTBlockRange; /// The base directory for any relative paths we emit. @@ -495,12 +502,11 @@ StringRef isysroot); /// Write out the signature and diagnostic options, and return the signature. - ASTFileSignature writeUnhashedControlBlock(Preprocessor &PP, - ASTContext &Context); + void writeUnhashedControlBlock(Preprocessor &PP, ASTContext &Context); + ASTFileSignature backpatchSignature(); /// Calculate hash of the pcm content. - static std::pair - createSignature(StringRef AllBytes, StringRef ASTBlockBytes); + std::pair createSignature() const; void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts); void WriteSourceManagerBlock(SourceManager &SourceMgr, diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2678,6 +2678,14 @@ } } +ASTFileSignature ASTReader::readSignature(const char *Blob) { + ASTFileSignature Sig; + using namespace llvm::support; + for (unsigned I = 0; I < ASTFileSignature::size; ++I) + Sig[I] = endian::readNext(Blob); + return Sig; +} + ASTReader::ASTReadResult ASTReader::ReadControlBlock(ModuleFile &F, SmallVectorImpl &Loaded, @@ -4734,12 +4742,6 @@ ShouldFinalizePCM = true; return Success; - case UNHASHED_CONTROL_BLOCK_ID: - // This block is handled using look-ahead during ReadControlBlock. We - // shouldn't get here! - Error("malformed block record in AST file"); - return Failure; - default: if (llvm::Error Err = Stream.SkipBlock()) { Error(std::move(Err)); @@ -4860,12 +4862,11 @@ switch ((UnhashedControlBlockRecordTypes)MaybeRecordType.get()) { case SIGNATURE: if (F) - F->Signature = ASTFileSignature::create(Record.begin(), Record.end()); + F->Signature = ASTReader::readSignature(Blob.data()); break; case AST_BLOCK_HASH: if (F) - F->ASTBlockHash = - ASTFileSignature::create(Record.begin(), Record.end()); + F->ASTBlockHash = ASTReader::readSignature(Blob.data()); break; case DIAGNOSTIC_OPTIONS: { bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0; @@ -5168,8 +5169,7 @@ return ASTFileSignature(); } if (SIGNATURE == MaybeRecord.get()) - return ASTFileSignature::create(Record.begin(), - Record.begin() + ASTFileSignature::size); + return ASTReader::readSignature(Blob.data()); } } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1120,50 +1120,117 @@ } std::pair -ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) { +ASTWriter::createSignature() const { + StringRef AllBytes(Buffer.data(), Buffer.size()); + llvm::SHA1 Hasher; - Hasher.update(ASTBlockBytes); + Hasher.update(AllBytes.slice(ASTBlockRange.first, ASTBlockRange.second)); ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result()); - // Add the remaining bytes (i.e. bytes before the unhashed control block that - // are not part of the AST block). - Hasher.update( - AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin())); + // Add the remaining bytes: + // 1. Before the unhashed control block. + Hasher.update(AllBytes.slice(0, UnhashedControlBlockRange.first)); + // 2. Between the unhashed control block and the AST block. Hasher.update( - AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end())); + AllBytes.slice(UnhashedControlBlockRange.second, ASTBlockRange.first)); + // 3. After the AST block. + Hasher.update(AllBytes.slice(ASTBlockRange.second, StringRef::npos)); ASTFileSignature Signature = ASTFileSignature::create(Hasher.result()); return std::make_pair(ASTBlockHash, Signature); } -ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, - ASTContext &Context) { +static constexpr ASTFileSignature DummySignature{ + {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}}; +static constexpr ASTFileSignature DummyASTBlockHash{ + {2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2}}; + +static void writeSignature(const ASTFileSignature &Sig, + SmallVectorImpl &Vec) { + llvm::raw_svector_ostream Out(Vec); + using namespace llvm::support; + endian::Writer LE(Out, little); + for (uint8_t Byte : Sig) + LE.write(Byte); +} + +ASTFileSignature ASTWriter::backpatchSignature() { + ASTFileSignature Signature; + // For implicit modules, write the hash of the PCM as its signature. + if (WritingModule && + PP->getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) { + + auto ReadSignatureAt = [&](uint64_t Offset) { + return ASTReader::readSignature(Buffer.data() + Offset); + }; + auto WriteSignatureAt = [&](const ASTFileSignature &Sig, uint64_t Offset) { + SmallString<128> Out; + writeSignature(Sig, Out); + std::copy_n(Out.begin(), Out.size(), Buffer.begin() + Offset); + }; + + assert(ReadSignatureAt(ASTBlockHashOffset) == DummyASTBlockHash); + assert(ReadSignatureAt(SignatureOffset) == DummySignature); + + ASTFileSignature ASTBlockHash; + std::tie(ASTBlockHash, Signature) = createSignature(); + + WriteSignatureAt(ASTBlockHash, ASTBlockHashOffset); + WriteSignatureAt(Signature, SignatureOffset); + + assert(ReadSignatureAt(ASTBlockHashOffset) == ASTBlockHash); + assert(ReadSignatureAt(SignatureOffset) == Signature); + } + + return Signature; +} + +void ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, + ASTContext &Context) { using namespace llvm; // Flush first to prepare the PCM hash (signature). Stream.FlushToWord(); - auto StartOfUnhashedControl = Stream.GetCurrentBitNo() >> 3; + UnhashedControlBlockRange.first = Stream.GetCurrentBitNo() >> 3; // Enter the block and prepare to write records. RecordData Record; Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5); // For implicit modules, write the hash of the PCM as its signature. - ASTFileSignature Signature; if (WritingModule && PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) { - ASTFileSignature ASTBlockHash; - auto ASTBlockStartByte = ASTBlockRange.first >> 3; - auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte; - std::tie(ASTBlockHash, Signature) = createSignature( - StringRef(Buffer.begin(), StartOfUnhashedControl), - StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength)); - - Record.append(ASTBlockHash.begin(), ASTBlockHash.end()); - Stream.EmitRecord(AST_BLOCK_HASH, Record); + // At this point, we don't know the actual signature of the file or the AST + // block - we're only able to compute those at the end of the serialization + // process. Let's store dummy signatures for now, and replace them with the + // real ones later on. + // The bitstream VBR-encodes record elements, which makes backpatching them + // really difficult. Let's store the signatures as blobs instead - they are + // guaranteed to be word-aligned, and we control their format/encoding. + SmallString<128> Blob; + + auto Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(AST_BLOCK_HASH)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + unsigned ASTBlockHashAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); + + Abbrev = std::make_shared(); + Abbrev->Add(BitCodeAbbrevOp(SIGNATURE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + unsigned SignatureAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); + + Record.push_back(AST_BLOCK_HASH); + writeSignature(DummyASTBlockHash, Blob); + Stream.EmitRecordWithBlob(ASTBlockHashAbbrev, Record, Blob); + ASTBlockHashOffset = Buffer.size() - Blob.size(); + Blob.clear(); Record.clear(); - Record.append(Signature.begin(), Signature.end()); - Stream.EmitRecord(SIGNATURE, Record); + + Record.push_back(SIGNATURE); + writeSignature(DummySignature, Blob); + Stream.EmitRecordWithBlob(SignatureAbbrev, Record, Blob); + SignatureOffset = Buffer.size() - Blob.size(); + Blob.clear(); Record.clear(); } @@ -1232,7 +1299,7 @@ // Leave the options block. Stream.ExitBlock(); - return Signature; + UnhashedControlBlockRange.second = Stream.GetCurrentBitNo() >> 3; } /// Write the control block. @@ -4690,6 +4757,7 @@ ASTContext &Context = SemaRef.Context; Preprocessor &PP = SemaRef.PP; + writeUnhashedControlBlock(PP, Context); collectNonAffectingInputFiles(); @@ -4838,7 +4906,7 @@ // Write the remaining AST contents. Stream.FlushToWord(); - ASTBlockRange.first = Stream.GetCurrentBitNo(); + ASTBlockRange.first = Stream.GetCurrentBitNo() >> 3; Stream.EnterSubblock(AST_BLOCK_ID, 5); ASTBlockStartOffset = Stream.GetCurrentBitNo(); @@ -5191,13 +5259,13 @@ Stream.EmitRecord(STATISTICS, Record); Stream.ExitBlock(); Stream.FlushToWord(); - ASTBlockRange.second = Stream.GetCurrentBitNo(); + ASTBlockRange.second = Stream.GetCurrentBitNo() >> 3; // Write the module file extension blocks. for (const auto &ExtWriter : ModuleFileExtensionWriters) WriteModuleFileExtension(SemaRef, *ExtWriter); - return writeUnhashedControlBlock(PP, Context); + return backpatchSignature(); } void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { diff --git a/clang/lib/Serialization/GlobalModuleIndex.cpp b/clang/lib/Serialization/GlobalModuleIndex.cpp --- a/clang/lib/Serialization/GlobalModuleIndex.cpp +++ b/clang/lib/Serialization/GlobalModuleIndex.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/FileManager.h" #include "clang/Lex/HeaderSearch.h" #include "clang/Serialization/ASTBitCodes.h" +#include "clang/Serialization/ASTReader.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/PCHContainerOperations.h" #include "llvm/ADT/DenseMap.h" @@ -698,8 +699,7 @@ // Get Signature. if (State == DiagnosticOptionsBlock && Code == SIGNATURE) - getModuleFileInfo(File).Signature = ASTFileSignature::create( - Record.begin(), Record.begin() + ASTFileSignature::size); + getModuleFileInfo(File).Signature = ASTReader::readSignature(Blob.data()); // We don't care about this record. } diff --git a/clang/test/Modules/ASTSignature.c b/clang/test/Modules/ASTSignature.c --- a/clang/test/Modules/ASTSignature.c +++ b/clang/test/Modules/ASTSignature.c @@ -1,6 +1,6 @@ // RUN: rm -rf %t -// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \ -// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only \ +// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \ // RUN: -fmodules-cache-path=%t -fdisable-module-hash %s // RUN: cp %t/MyHeader2.pcm %t1.pcm // RUN: rm -rf %t @@ -8,17 +8,18 @@ // RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \ // RUN: -fmodules-cache-path=%t -fdisable-module-hash %s // RUN: cp %t/MyHeader2.pcm %t2.pcm -// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump -// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump +// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t1.pcm > %t1.dump +// RUN: llvm-bcanalyzer --dump --disable-histogram --show-binary-blobs %t2.pcm > %t2.dump // RUN: cat %t1.dump %t2.dump | FileCheck %s #include "my_header_2.h" my_int var = 42; -// CHECK: [[AST_BLOCK_HASH:]] -// CHECK: [[SIGNATURE:]] -// CHECK: [[AST_BLOCK_HASH]] -// CHECK-NOT: [[SIGNATURE]] -// The modules built by this test are designed to yield the same AST. If this -// test fails, it means that the AST block is has become non-relocatable. +// CHECK: blob data = '[[AST_BLOCK_HASH:.*]]' +// CHECK: blob data = '[[SIGNATURE:.*]]' +// CHECK: blob data = '[[AST_BLOCK_HASH]]' +// CHECK-NOT: blob data = '[[SIGNATURE]]' +// The modules built by this test are designed to yield the same AST but distinct AST files. +// If this test fails, it means that either the AST block has become non-relocatable, +// or the file signature stopped hashing some parts of the AST file.