diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. - const unsigned VERSION_MAJOR = 10; + const unsigned VERSION_MAJOR = 11; /// AST file minor version number supported by this version of /// Clang. @@ -242,14 +242,16 @@ /// Raw source location. unsigned Loc = 0; - /// Offset in the AST file. Keep structure alignment 32-bit and avoid - /// padding gap because undefined value in the padding affects AST hash. + /// Offset relative to the start of the AST block. Keep structure + /// alignment 32-bit and avoid padding gap because undefined value in the + /// padding affects AST hash. UnderalignedInt64 BitOffset; DeclOffset() = default; - DeclOffset(SourceLocation Loc, uint64_t BitOffset) { + DeclOffset(SourceLocation Loc, uint64_t BitOffset, + uint64_t ASTBlockBitOffset) { setLocation(Loc); - setBitOffset(BitOffset); + setBitOffset(BitOffset, ASTBlockBitOffset); } void setLocation(SourceLocation L) { @@ -260,12 +262,12 @@ return SourceLocation::getFromRawEncoding(Loc); } - void setBitOffset(uint64_t Offset) { - BitOffset.setBitOffset(Offset); + void setBitOffset(uint64_t Offset, const uint64_t ASTBlockBitOffset) { + BitOffset.setBitOffset(Offset - ASTBlockBitOffset); } - uint64_t getBitOffset() const { - return BitOffset.getBitOffset(); + uint64_t getBitOffset(const uint64_t ASTBlockBitOffset) const { + return BitOffset.getBitOffset() + ASTBlockBitOffset; } }; @@ -394,6 +396,9 @@ /// Record code for the signature that identifiers this AST file. SIGNATURE = 1, + /// Record code for the content hash of the AST block. + AST_BLOCK_HASH, + /// Record code for the diagnostic options table. DIAGNOSTIC_OPTIONS, diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -27,6 +27,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -441,7 +442,9 @@ /// A list of the module file extension writers. std::vector> - ModuleFileExtensionWriters; + ModuleFileExtensionWriters; + + std::pair ASTBlockRange; /// Retrieve or create a submodule ID for this module. unsigned getSubmoduleID(Module *Mod); @@ -458,7 +461,8 @@ ASTContext &Context); /// Calculate hash of the pcm content. - static ASTFileSignature createSignature(StringRef Bytes); + static std::pair + createSignature(StringRef AllBytes, StringRef ASTBlockBytes); void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts, bool Modules); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -168,6 +168,10 @@ /// and modification time to identify this particular file. ASTFileSignature Signature; + /// The signature of the AST block of the module file, this can be used to + /// unique module files based on AST contents. + ASTFileSignature ASTBlockHash; + /// Whether this module has been directly imported by the /// user. bool DirectlyImported = false; @@ -185,6 +189,9 @@ /// The global bit offset (or base) of this module uint64_t GlobalBitOffset = 0; + /// The bit offset of the AST block of this module + uint64_t ASTBlockBitOffset = 0; + /// The serialized bitstream data for this file. StringRef Data; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3377,7 +3377,7 @@ F.SLocEntryOffsets = (const uint32_t *)Blob.data(); F.LocalNumSLocEntries = Record[0]; unsigned SLocSpaceSize = Record[1]; - F.SLocEntryOffsetsBase = Record[2]; + F.SLocEntryOffsetsBase = Record[2] + F.ASTBlockBitOffset; std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) = SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries, SLocSpaceSize); @@ -3696,7 +3696,7 @@ F.MacroOffsets = (const uint32_t *)Blob.data(); F.LocalNumMacros = Record[0]; unsigned LocalBaseMacroID = Record[1]; - F.MacroOffsetsBase = Record[2]; + F.MacroOffsetsBase = Record[2] + F.ASTBlockBitOffset; F.BaseMacroID = getTotalNumMacros(); if (F.LocalNumMacros > 0) { @@ -3837,17 +3837,18 @@ while (Data < DataEnd) { // FIXME: Looking up dependency modules by filename is horrible. Let's - // start fixing this with prebuilt and explicit modules and see how it - // goes... + // start fixing this with prebuilt, explicit and implicit modules and see + // how it goes... using namespace llvm::support; ModuleKind Kind = static_cast( endian::readNext(Data)); uint16_t Len = endian::readNext(Data); StringRef Name = StringRef((const char*)Data, Len); Data += Len; - ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule - ? ModuleMgr.lookupByModuleName(Name) - : ModuleMgr.lookupByFileName(Name)); + ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule || + Kind == MK_ImplicitModule + ? ModuleMgr.lookupByModuleName(Name) + : ModuleMgr.lookupByFileName(Name)); if (!OM) { std::string Msg = "SourceLocation remap refers to unknown module, cannot find "; @@ -4552,6 +4553,7 @@ // This is used for compatibility with older PCH formats. bool HaveReadControlBlock = false; while (true) { + const uint64_t CurrentStreamOffset = Stream.GetCurrentBitNo(); Expected MaybeEntry = Stream.advance(); if (!MaybeEntry) { Error(MaybeEntry.takeError()); @@ -4606,6 +4608,7 @@ return VersionMismatch; } + F.ASTBlockBitOffset = CurrentStreamOffset; // Record that we've loaded this module. Loaded.push_back(ImportedModule(M, ImportedBy, ImportLoc)); ShouldFinalizePCM = true; @@ -4736,6 +4739,10 @@ if (F) std::copy(Record.begin(), Record.end(), F->Signature.data()); break; + case AST_BLOCK_HASH: + if (F) + std::copy(Record.begin(), Record.end(), F->ASTBlockHash.data()); + break; case DIAGNOSTIC_OPTIONS: { bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0; if (Listener && ValidateDiagnosticOptions && @@ -6350,7 +6357,8 @@ assert(I != GlobalTypeMap.end() && "Corrupted global type map"); ModuleFile *M = I->second; return RecordLocation( - M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset()); + M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() + + M->ASTBlockBitOffset); } static llvm::Optional getTypeClassForCode(TypeCode code) { diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -2869,7 +2869,7 @@ const DeclOffset &DOffs = M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; Loc = TranslateSourceLocation(*M, DOffs.getLocation()); - return RecordLocation(M, DOffs.getBitOffset()); + return RecordLocation(M, DOffs.getBitOffset(M->ASTBlockBitOffset)); } ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -10,14 +10,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/AST/OpenMPClause.h" -#include "clang/Serialization/ASTRecordWriter.h" #include "ASTCommon.h" #include "ASTReaderInternals.h" #include "MultiOnDiskHashTable.h" -#include "clang/AST/AbstractTypeWriter.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTUnresolvedSet.h" +#include "clang/AST/AbstractTypeWriter.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" @@ -31,6 +29,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/TemplateName.h" #include "clang/AST/Type.h" @@ -65,7 +64,9 @@ #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Sema.h" #include "clang/Sema/Weak.h" +#include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTReader.h" +#include "clang/Serialization/ASTRecordWriter.h" #include "clang/Serialization/InMemoryModuleCache.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/ModuleFileExtension.h" @@ -961,6 +962,7 @@ BLOCK(UNHASHED_CONTROL_BLOCK); RECORD(SIGNATURE); + RECORD(AST_BLOCK_HASH); RECORD(DIAGNOSTIC_OPTIONS); RECORD(DIAG_PRAGMA_MAPPINGS); @@ -1026,13 +1028,23 @@ return Filename + Pos; } -ASTFileSignature ASTWriter::createSignature(StringRef Bytes) { - // Calculate the hash till start of UNHASHED_CONTROL_BLOCK. +std::pair +ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) { llvm::SHA1 Hasher; - Hasher.update(ArrayRef(Bytes.bytes_begin(), Bytes.size())); + Hasher.update(ASTBlockBytes); auto Hash = Hasher.result(); + ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hash); + + // Add the remaing bytes (i.e. bytes before the unhashed control block that + // are not part of the AST block) + Hasher.update( + AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin())); + Hasher.update( + AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end())); + Hash = Hasher.result(); + ASTFileSignature Signature = ASTFileSignature::create(Hash); - return ASTFileSignature::create(Hash); + return std::make_pair(ASTBlockHash, Signature); } ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, @@ -1049,7 +1061,16 @@ ASTFileSignature Signature; if (WritingModule && PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) { - Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl)); + ASTFileSignature ASTBlockHash; + auto ASTBlockStartByte = ASTBlockRange.first >> 3; + auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte; + std::tie(ASTBlockHash, Signature) = createSignature( + StringRef(Buffer.begin(), StartOfUnhashedControl), + StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength)); + + Record.append(ASTBlockHash.begin(), ASTBlockHash.end()); + Stream.EmitRecord(AST_BLOCK_HASH, Record); + Record.clear(); Record.append(Signature.begin(), Signature.end()); Stream.EmitRecord(SIGNATURE, Record); Record.clear(); @@ -2041,7 +2062,7 @@ RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), SourceMgr.getNextLocalOffset() - 1 /* skip dummy */, - SLocEntryOffsetsBase}; + SLocEntryOffsetsBase - ASTBlockRange.first}; Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets)); } @@ -2320,7 +2341,7 @@ { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), FirstMacroID - NUM_PREDEF_MACRO_IDS, - MacroOffsetsBase}; + MacroOffsetsBase - ASTBlockRange.first}; Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets)); } } @@ -2834,7 +2855,7 @@ assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST"); // Emit the type's representation. - uint64_t Offset = ASTTypeWriter(*this).write(T); + uint64_t Offset = ASTTypeWriter(*this).write(T) - ASTBlockRange.first; // Record the offset for this type. unsigned Index = Idx.getIndex() - FirstTypeID; @@ -4544,6 +4565,8 @@ WriteControlBlock(PP, Context, isysroot, OutputFile); // Write the remaining AST contents. + Stream.FlushToWord(); + ASTBlockRange.first = Stream.GetCurrentBitNo(); Stream.EnterSubblock(AST_BLOCK_ID, 5); // This is so that older clang versions, before the introduction @@ -4675,9 +4698,9 @@ // c++-base-specifiers-id:i32 // type-id:i32) // - // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule or - // MK_ExplicitModule, then the module-name is the module name. Otherwise, - // it is the module file name. + // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule, + // MK_ExplicitModule or MK_ImplicitModule, then the module-name is the + // module name. Otherwise, it is the module file name. auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); @@ -4690,10 +4713,7 @@ endian::Writer LE(Out, little); LE.write(static_cast(M.Kind)); - StringRef Name = - M.Kind == MK_PrebuiltModule || M.Kind == MK_ExplicitModule - ? M.ModuleName - : M.FileName; + StringRef Name = M.isModule() ? M.ModuleName : M.FileName; LE.write(Name.size()); Out.write(Name.data(), Name.size()); @@ -4905,6 +4925,7 @@ NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts}; Stream.EmitRecord(STATISTICS, Record); Stream.ExitBlock(); + ASTBlockRange.second = Stream.GetCurrentBitNo(); // Write the module file extension blocks. for (const auto &ExtWriter : ModuleFileExtensionWriters) diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2434,12 +2434,12 @@ SourceLocation Loc = D->getLocation(); unsigned Index = ID - FirstDeclID; if (DeclOffsets.size() == Index) - DeclOffsets.emplace_back(Loc, Offset); + DeclOffsets.emplace_back(Loc, Offset, ASTBlockRange.first); else if (DeclOffsets.size() < Index) { // FIXME: Can/should this happen? DeclOffsets.resize(Index+1); DeclOffsets[Index].setLocation(Loc); - DeclOffsets[Index].setBitOffset(Offset); + DeclOffsets[Index].setBitOffset(Offset, ASTBlockRange.first); } else { llvm_unreachable("declarations should be emitted in ID order"); } diff --git a/clang/test/Modules/ASTSignature.c b/clang/test/Modules/ASTSignature.c new file mode 100644 --- /dev/null +++ b/clang/test/Modules/ASTSignature.c @@ -0,0 +1,24 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \ +// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s +// RUN: cp %t/MyHeader2.pcm %t1.pcm +// RUN: rm -rf %t +// RUN: %clang_cc1 -iquote "/dev/null" -iquote %S/Inputs/ASTHash/ -fsyntax-only \ +// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: -fmodules-cache-path=%t -fdisable-module-hash %s +// RUN: cp %t/MyHeader2.pcm %t2.pcm +// RUN: llvm-bcanalyzer --dump --disable-histogram %t1.pcm > %t1.dump +// RUN: llvm-bcanalyzer --dump --disable-histogram %t2.pcm > %t2.dump +// RUN: cat %t1.dump %t2.dump | FileCheck %s + +#include "my_header_2.h" + +my_int var = 42; + +// CHECK: [[AST_BLOCK_HASH:]] +// CHECK: [[SIGNATURE:]] +// CHECK: [[AST_BLOCK_HASH]] +// CHECK-NOT: [[SIGNATURE]] +// The modules built by this test are designed to yield the same AST. If this +// test fails, it means that the AST block is has become non-relocatable. diff --git a/clang/test/Modules/Inputs/ASTHash/module.modulemap b/clang/test/Modules/Inputs/ASTHash/module.modulemap new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/module.modulemap @@ -0,0 +1,8 @@ +module MyHeader1 { + header "my_header_1.h" +} + +module MyHeader2 { + header "my_header_2.h" + export * +} diff --git a/clang/test/Modules/Inputs/ASTHash/my_header_1.h b/clang/test/Modules/Inputs/ASTHash/my_header_1.h new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/my_header_1.h @@ -0,0 +1 @@ +typedef int my_int; diff --git a/clang/test/Modules/Inputs/ASTHash/my_header_2.h b/clang/test/Modules/Inputs/ASTHash/my_header_2.h new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/my_header_2.h @@ -0,0 +1,3 @@ +#include "my_header_1.h" + +extern my_int var;