diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 10; +const unsigned VERSION_MAJOR = 11; /// AST file minor version number supported by this version of /// Clang. @@ -242,14 +242,16 @@ /// Raw source location. unsigned Loc = 0; - /// Offset in the AST file. Keep structure alignment 32-bit and avoid - /// padding gap because undefined value in the padding affects AST hash. + /// Offset relative to the start of the DECLTYPES_BLOCK block. Keep + /// structure alignment 32-bit and avoid padding gap because undefined + /// value in the padding affects AST hash. UnderalignedInt64 BitOffset; DeclOffset() = default; - DeclOffset(SourceLocation Loc, uint64_t BitOffset) { + DeclOffset(SourceLocation Loc, uint64_t BitOffset, + uint64_t DeclTypesBlockStartOffset) { setLocation(Loc); - setBitOffset(BitOffset); + setBitOffset(BitOffset, DeclTypesBlockStartOffset); } void setLocation(SourceLocation L) { @@ -260,12 +262,13 @@ return SourceLocation::getFromRawEncoding(Loc); } - void setBitOffset(uint64_t Offset) { - BitOffset.setBitOffset(Offset); + void setBitOffset(uint64_t Offset, + const uint64_t DeclTypesBlockStartOffset) { + BitOffset.setBitOffset(Offset - DeclTypesBlockStartOffset); } - uint64_t getBitOffset() const { - return BitOffset.getBitOffset(); + uint64_t getBitOffset(const uint64_t DeclTypesBlockStartOffset) const { + return BitOffset.getBitOffset() + DeclTypesBlockStartOffset; } }; @@ -394,6 +397,9 @@ /// Record code for the signature that identifiers this AST file. SIGNATURE = 1, + /// Record code for the content hash of the AST block. + AST_BLOCK_HASH, + /// Record code for the diagnostic options table. DIAGNOSTIC_OPTIONS, diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1890,7 +1890,8 @@ /// ReadBlockAbbrevs - Enter a subblock of the specified BlockID with the /// specified cursor. Read the abbreviations that are at the top of the block /// and then leave the cursor pointing into the block. - static bool ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor, unsigned BlockID); + static bool ReadBlockAbbrevs(llvm::BitstreamCursor &Cursor, unsigned BlockID, + uint64_t *StartOfBlockOffset = nullptr); /// Finds all the visible declarations with a given name. /// The current implementation of this method just loads the entire diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -27,6 +27,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -137,6 +138,12 @@ /// The module we're currently writing, if any. Module *WritingModule = nullptr; + /// The offset of the first bit inside the AST_BLOCK. + uint64_t ASTBlockStartOffset = 0; + + /// The range representing all the AST_BLOCK. + std::pair ASTBlockRange; + /// The base directory for any relative paths we emit. std::string BaseDirectory; @@ -206,6 +213,10 @@ /// the declaration's ID. std::vector DeclOffsets; + /// The offset of the DECLTYPES_BLOCK. The offsets in DeclOffsets + /// are relative to this value. + uint64_t DeclTypesBlockStartOffset = 0; + /// Sorted (by file offset) vector of pairs of file offset/DeclID. using LocDeclIDsTy = SmallVector, 64>; @@ -441,7 +452,7 @@ /// A list of the module file extension writers. std::vector> - ModuleFileExtensionWriters; + ModuleFileExtensionWriters; /// Retrieve or create a submodule ID for this module. unsigned getSubmoduleID(Module *Mod); @@ -458,7 +469,8 @@ ASTContext &Context); /// Calculate hash of the pcm content. - static ASTFileSignature createSignature(StringRef Bytes); + static std::pair + createSignature(StringRef AllBytes, StringRef ASTBlockBytes); void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts, bool Modules); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -168,6 +168,10 @@ /// and modification time to identify this particular file. ASTFileSignature Signature; + /// The signature of the AST block of the module file, this can be used to + /// unique module files based on AST contents. + ASTFileSignature ASTBlockHash; + /// Whether this module has been directly imported by the /// user. bool DirectlyImported = false; @@ -185,6 +189,9 @@ /// The global bit offset (or base) of this module uint64_t GlobalBitOffset = 0; + /// The bit offset of the AST block of this module. + uint64_t ASTBlockStartOffset = 0; + /// The serialized bitstream data for this file. StringRef Data; @@ -242,6 +249,9 @@ /// Cursor used to read source location entries. llvm::BitstreamCursor SLocEntryCursor; + /// The bit offset to the start of the SOURCE_MANAGER_BLOCK. + uint64_t SourceManagerBlockStartOffset = 0; + /// The number of source location entries in this AST file. unsigned LocalNumSLocEntries = 0; @@ -409,11 +419,14 @@ // === Declarations === - /// DeclsCursor - This is a cursor to the start of the DECLS_BLOCK block. It - /// has read all the abbreviations at the start of the block and is ready to - /// jump around with these in context. + /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block. + /// It has read all the abbreviations at the start of the block and is ready + /// to jump around with these in context. llvm::BitstreamCursor DeclsCursor; + /// The offset to the start of the DECLTYPES_BLOCK block. + uint64_t DeclsBlockStartOffset = 0; + /// The number of declarations in this AST file. unsigned LocalNumDecls = 0; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1336,6 +1336,7 @@ Error(std::move(Err)); return true; } + F.SourceManagerBlockStartOffset = SLocEntryCursor.GetCurrentBitNo(); RecordData Record; while (true) { @@ -1628,13 +1629,17 @@ /// Enter a subblock of the specified BlockID with the specified cursor. Read /// the abbreviations that are at the top of the block and then leave the cursor /// pointing into the block. -bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID) { +bool ASTReader::ReadBlockAbbrevs(BitstreamCursor &Cursor, unsigned BlockID, + uint64_t *StartOfBlockOffset) { if (llvm::Error Err = Cursor.EnterSubBlock(BlockID)) { // FIXME this drops errors on the floor. consumeError(std::move(Err)); return true; } + if (StartOfBlockOffset) + *StartOfBlockOffset = Cursor.GetCurrentBitNo(); + while (true) { uint64_t Offset = Cursor.GetCurrentBitNo(); Expected MaybeCode = Cursor.ReadCode(); @@ -2933,6 +2938,7 @@ Error(std::move(Err)); return Failure; } + F.ASTBlockStartOffset = Stream.GetCurrentBitNo(); // Read all of the records and blocks for the AST file. RecordData Record; @@ -2973,7 +2979,8 @@ Error(std::move(Err)); return Failure; } - if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID)) { + if (ReadBlockAbbrevs(F.DeclsCursor, DECLTYPES_BLOCK_ID, + &F.DeclsBlockStartOffset)) { Error("malformed block record in AST file"); return Failure; } @@ -3377,7 +3384,7 @@ F.SLocEntryOffsets = (const uint32_t *)Blob.data(); F.LocalNumSLocEntries = Record[0]; unsigned SLocSpaceSize = Record[1]; - F.SLocEntryOffsetsBase = Record[2]; + F.SLocEntryOffsetsBase = Record[2] + F.SourceManagerBlockStartOffset; std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) = SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries, SLocSpaceSize); @@ -3696,7 +3703,7 @@ F.MacroOffsets = (const uint32_t *)Blob.data(); F.LocalNumMacros = Record[0]; unsigned LocalBaseMacroID = Record[1]; - F.MacroOffsetsBase = Record[2]; + F.MacroOffsetsBase = Record[2] + F.ASTBlockStartOffset; F.BaseMacroID = getTotalNumMacros(); if (F.LocalNumMacros > 0) { @@ -3837,17 +3844,18 @@ while (Data < DataEnd) { // FIXME: Looking up dependency modules by filename is horrible. Let's - // start fixing this with prebuilt and explicit modules and see how it - // goes... + // start fixing this with prebuilt, explicit and implicit modules and see + // how it goes... using namespace llvm::support; ModuleKind Kind = static_cast( endian::readNext(Data)); uint16_t Len = endian::readNext(Data); StringRef Name = StringRef((const char*)Data, Len); Data += Len; - ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule - ? ModuleMgr.lookupByModuleName(Name) - : ModuleMgr.lookupByFileName(Name)); + ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule || + Kind == MK_ImplicitModule + ? ModuleMgr.lookupByModuleName(Name) + : ModuleMgr.lookupByFileName(Name)); if (!OM) { std::string Msg = "SourceLocation remap refers to unknown module, cannot find "; @@ -4736,6 +4744,11 @@ if (F) F->Signature = ASTFileSignature::create(Record.begin(), Record.end()); break; + case AST_BLOCK_HASH: + if (F) + F->ASTBlockHash = + ASTFileSignature::create(Record.begin(), Record.end()); + break; case DIAGNOSTIC_OPTIONS: { bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0; if (Listener && ValidateDiagnosticOptions && @@ -6350,7 +6363,8 @@ assert(I != GlobalTypeMap.end() && "Corrupted global type map"); ModuleFile *M = I->second; return RecordLocation( - M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset()); + M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() + + M->DeclsBlockStartOffset); } static llvm::Optional getTypeClassForCode(TypeCode code) { diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -2869,7 +2869,7 @@ const DeclOffset &DOffs = M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; Loc = TranslateSourceLocation(*M, DOffs.getLocation()); - return RecordLocation(M, DOffs.getBitOffset()); + return RecordLocation(M, DOffs.getBitOffset(M->DeclsBlockStartOffset)); } ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -10,14 +10,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/AST/OpenMPClause.h" -#include "clang/Serialization/ASTRecordWriter.h" #include "ASTCommon.h" #include "ASTReaderInternals.h" #include "MultiOnDiskHashTable.h" -#include "clang/AST/AbstractTypeWriter.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTUnresolvedSet.h" +#include "clang/AST/AbstractTypeWriter.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" @@ -31,6 +29,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/TemplateName.h" #include "clang/AST/Type.h" @@ -65,7 +64,9 @@ #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Sema.h" #include "clang/Sema/Weak.h" +#include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTReader.h" +#include "clang/Serialization/ASTRecordWriter.h" #include "clang/Serialization/InMemoryModuleCache.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/ModuleFileExtension.h" @@ -961,6 +962,7 @@ BLOCK(UNHASHED_CONTROL_BLOCK); RECORD(SIGNATURE); + RECORD(AST_BLOCK_HASH); RECORD(DIAGNOSTIC_OPTIONS); RECORD(DIAG_PRAGMA_MAPPINGS); @@ -1026,13 +1028,23 @@ return Filename + Pos; } -ASTFileSignature ASTWriter::createSignature(StringRef Bytes) { - // Calculate the hash till start of UNHASHED_CONTROL_BLOCK. +std::pair +ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) { llvm::SHA1 Hasher; - Hasher.update(ArrayRef(Bytes.bytes_begin(), Bytes.size())); + Hasher.update(ASTBlockBytes); auto Hash = Hasher.result(); + ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hash); + + // Add the remaining bytes (i.e. bytes before the unhashed control block that + // are not part of the AST block). + Hasher.update( + AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin())); + Hasher.update( + AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end())); + Hash = Hasher.result(); + ASTFileSignature Signature = ASTFileSignature::create(Hash); - return ASTFileSignature::create(Hash); + return std::make_pair(ASTBlockHash, Signature); } ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, @@ -1049,7 +1061,16 @@ ASTFileSignature Signature; if (WritingModule && PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) { - Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl)); + ASTFileSignature ASTBlockHash; + auto ASTBlockStartByte = ASTBlockRange.first >> 3; + auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte; + std::tie(ASTBlockHash, Signature) = createSignature( + StringRef(Buffer.begin(), StartOfUnhashedControl), + StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength)); + + Record.append(ASTBlockHash.begin(), ASTBlockHash.end()); + Stream.EmitRecord(AST_BLOCK_HASH, Record); + Record.clear(); Record.append(Signature.begin(), Signature.end()); Stream.EmitRecord(SIGNATURE, Record); Record.clear(); @@ -1901,6 +1922,7 @@ // Enter the source manager block. Stream.EnterSubblock(SOURCE_MANAGER_BLOCK_ID, 4); + const uint64_t SourceManagerBlockOffset = Stream.GetCurrentBitNo(); // Abbreviations for the various kinds of source-location entries. unsigned SLocFileAbbrv = CreateSLocFileAbbrev(Stream); @@ -2041,7 +2063,7 @@ RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), SourceMgr.getNextLocalOffset() - 1 /* skip dummy */, - SLocEntryOffsetsBase}; + SLocEntryOffsetsBase - SourceManagerBlockOffset}; Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets)); } @@ -2320,7 +2342,7 @@ { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), FirstMacroID - NUM_PREDEF_MACRO_IDS, - MacroOffsetsBase}; + MacroOffsetsBase - ASTBlockStartOffset}; Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets)); } } @@ -2834,7 +2856,7 @@ assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST"); // Emit the type's representation. - uint64_t Offset = ASTTypeWriter(*this).write(T); + uint64_t Offset = ASTTypeWriter(*this).write(T) - DeclTypesBlockStartOffset; // Record the offset for this type. unsigned Index = Idx.getIndex() - FirstTypeID; @@ -4544,7 +4566,10 @@ WriteControlBlock(PP, Context, isysroot, OutputFile); // Write the remaining AST contents. + Stream.FlushToWord(); + ASTBlockRange.first = Stream.GetCurrentBitNo(); Stream.EnterSubblock(AST_BLOCK_ID, 5); + ASTBlockStartOffset = Stream.GetCurrentBitNo(); // This is so that older clang versions, before the introduction // of the control block, can read and reject the newer PCH format. @@ -4675,9 +4700,9 @@ // c++-base-specifiers-id:i32 // type-id:i32) // - // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule or - // MK_ExplicitModule, then the module-name is the module name. Otherwise, - // it is the module file name. + // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule, + // MK_ExplicitModule or MK_ImplicitModule, then the module-name is the + // module name. Otherwise, it is the module file name. auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); @@ -4690,10 +4715,7 @@ endian::Writer LE(Out, little); LE.write(static_cast(M.Kind)); - StringRef Name = - M.Kind == MK_PrebuiltModule || M.Kind == MK_ExplicitModule - ? M.ModuleName - : M.FileName; + StringRef Name = M.isModule() ? M.ModuleName : M.FileName; LE.write(Name.size()); Out.write(Name.data(), Name.size()); @@ -4737,6 +4759,7 @@ // Keep writing types, declarations, and declaration update records // until we've emitted all of them. Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/5); + DeclTypesBlockStartOffset = Stream.GetCurrentBitNo(); WriteTypeAbbrevs(); WriteDeclAbbrevs(); do { @@ -4905,6 +4928,8 @@ NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts}; Stream.EmitRecord(STATISTICS, Record); Stream.ExitBlock(); + Stream.FlushToWord(); + ASTBlockRange.second = Stream.GetCurrentBitNo(); // Write the module file extension blocks. for (const auto &ExtWriter : ModuleFileExtensionWriters) diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2432,12 +2432,12 @@ SourceLocation Loc = D->getLocation(); unsigned Index = ID - FirstDeclID; if (DeclOffsets.size() == Index) - DeclOffsets.emplace_back(Loc, Offset); + DeclOffsets.emplace_back(Loc, Offset, DeclTypesBlockStartOffset); else if (DeclOffsets.size() < Index) { // FIXME: Can/should this happen? DeclOffsets.resize(Index+1); DeclOffsets[Index].setLocation(Loc); - DeclOffsets[Index].setBitOffset(Offset); + DeclOffsets[Index].setBitOffset(Offset, DeclTypesBlockStartOffset); } else { llvm_unreachable("declarations should be emitted in ID order"); } diff --git a/clang/test/Modules/ASTSignature.c b/clang/test/Modules/ASTSignature.c new file mode 100644 --- /dev/null +++ b/clang/test/Modules/ASTSignature.c @@ -0,0 +1,24 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \ +// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s +// RUN: cp %t/MyHeader2.pcm %t1.pcm +// RUN: rm -rf %t +// RUN: %clang_cc1 -iquote "/dev/null" -iquote %S/Inputs/ASTHash/ -fsyntax-only \ +// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s +// RUN: cp %t/MyHeader2.pcm %t2.pcm +// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump +// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump +// RUN: cat %t1.dump %t2.dump | FileCheck %s + +#include "my_header_2.h" + +my_int var = 42; + +// CHECK: [[AST_BLOCK_HASH:]] +// CHECK: [[SIGNATURE:]] +// CHECK: [[AST_BLOCK_HASH]] +// CHECK-NOT: [[SIGNATURE]] +// The modules built by this test are designed to yield the same AST. If this +// test fails, it means that the AST block is has become non-relocatable. diff --git a/clang/test/Modules/Inputs/ASTHash/module.modulemap b/clang/test/Modules/Inputs/ASTHash/module.modulemap new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/module.modulemap @@ -0,0 +1,8 @@ +module MyHeader1 { + header "my_header_1.h" +} + +module MyHeader2 { + header "my_header_2.h" + export * +} diff --git a/clang/test/Modules/Inputs/ASTHash/my_header_1.h b/clang/test/Modules/Inputs/ASTHash/my_header_1.h new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/my_header_1.h @@ -0,0 +1 @@ +typedef int my_int; diff --git a/clang/test/Modules/Inputs/ASTHash/my_header_2.h b/clang/test/Modules/Inputs/ASTHash/my_header_2.h new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/my_header_2.h @@ -0,0 +1,3 @@ +#include "my_header_1.h" + +extern my_int var;