diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -264,8 +264,8 @@ BitOffset.setBitOffset(Offset); } - uint64_t getBitOffset() const { - return BitOffset.getBitOffset(); + uint64_t getBitOffset(const uint64_t ASTBlockBitOffset) const { + return BitOffset.getBitOffset() + ASTBlockBitOffset; } }; @@ -394,6 +394,9 @@ /// Record code for the signature that identifiers this AST file. SIGNATURE = 1, + /// Record code for the signature of the AST block. + AST_SIGNATURE, + /// Record code for the diagnostic options table. DIAGNOSTIC_OPTIONS, diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -1290,6 +1290,7 @@ off_t ExpectedSize, time_t ExpectedModTime, ASTFileSignature ExpectedSignature, unsigned ClientLoadCapabilities); + ASTReadResult ReadControlBlock(ModuleFile &F, SmallVectorImpl &Loaded, const ModuleFile *ImportedBy, diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -27,6 +27,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -441,7 +442,9 @@ /// A list of the module file extension writers. std::vector> - ModuleFileExtensionWriters; + ModuleFileExtensionWriters; + + std::pair ASTBlockRange; /// Retrieve or create a submodule ID for this module. unsigned getSubmoduleID(Module *Mod); @@ -458,7 +461,8 @@ ASTContext &Context); /// Calculate hash of the pcm content. - static ASTFileSignature createSignature(StringRef Bytes); + static std::pair + createSignature(StringRef AllBytes, StringRef ASTBlockBytes); void WriteInputFiles(SourceManager &SourceMgr, HeaderSearchOptions &HSOpts, bool Modules); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -168,6 +168,10 @@ /// and modification time to identify this particular file. ASTFileSignature Signature; + /// The signature of the AST block of the module file, this can be used to + /// unique module files based on AST contents. + ASTFileSignature ASTSignature; + /// Whether this module has been directly imported by the /// user. bool DirectlyImported = false; @@ -185,6 +189,9 @@ /// The global bit offset (or base) of this module uint64_t GlobalBitOffset = 0; + /// The bit offset of the AST block of this module + uint64_t ASTBlockBitOffset = 0; + /// The serialized bitstream data for this file. StringRef Data; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -2928,7 +2928,6 @@ ASTReader::ASTReadResult ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) { BitstreamCursor &Stream = F.Stream; - if (llvm::Error Err = Stream.EnterSubBlock(AST_BLOCK_ID)) { Error(std::move(Err)); return Failure; @@ -3377,7 +3376,7 @@ F.SLocEntryOffsets = (const uint32_t *)Blob.data(); F.LocalNumSLocEntries = Record[0]; unsigned SLocSpaceSize = Record[1]; - F.SLocEntryOffsetsBase = Record[2]; + F.SLocEntryOffsetsBase = Record[2] + F.ASTBlockBitOffset; std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) = SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries, SLocSpaceSize); @@ -3696,7 +3695,7 @@ F.MacroOffsets = (const uint32_t *)Blob.data(); F.LocalNumMacros = Record[0]; unsigned LocalBaseMacroID = Record[1]; - F.MacroOffsetsBase = Record[2]; + F.MacroOffsetsBase = Record[2] + F.ASTBlockBitOffset; F.BaseMacroID = getTotalNumMacros(); if (F.LocalNumMacros > 0) { @@ -3837,17 +3836,18 @@ while (Data < DataEnd) { // FIXME: Looking up dependency modules by filename is horrible. Let's - // start fixing this with prebuilt and explicit modules and see how it - // goes... + // start fixing this with prebuilt, explicit and implicit modules and see + // how it goes... using namespace llvm::support; ModuleKind Kind = static_cast( endian::readNext(Data)); uint16_t Len = endian::readNext(Data); StringRef Name = StringRef((const char*)Data, Len); Data += Len; - ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule - ? ModuleMgr.lookupByModuleName(Name) - : ModuleMgr.lookupByFileName(Name)); + ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule || + Kind == MK_ImplicitModule + ? ModuleMgr.lookupByModuleName(Name) + : ModuleMgr.lookupByFileName(Name)); if (!OM) { std::string Msg = "SourceLocation remap refers to unknown module, cannot find "; @@ -4483,7 +4483,6 @@ off_t ExpectedSize, time_t ExpectedModTime, ASTFileSignature ExpectedSignature, unsigned ClientLoadCapabilities) { - ModuleFile *M; std::string ErrorStr; ModuleManager::AddModuleResult AddResult = ModuleMgr.addModule(FileName, Type, ImportLoc, ImportedBy, @@ -4552,6 +4551,7 @@ // This is used for compatibility with older PCH formats. bool HaveReadControlBlock = false; while (true) { + const uint64_t CurrentStreamOffset = Stream.GetCurrentBitNo(); Expected MaybeEntry = Stream.advance(); if (!MaybeEntry) { Error(MaybeEntry.takeError()); @@ -4606,6 +4606,7 @@ return VersionMismatch; } + F.ASTBlockBitOffset = CurrentStreamOffset; // Record that we've loaded this module. Loaded.push_back(ImportedModule(M, ImportedBy, ImportLoc)); ShouldFinalizePCM = true; @@ -4736,6 +4737,10 @@ if (F) std::copy(Record.begin(), Record.end(), F->Signature.data()); break; + case AST_SIGNATURE: + if (F) + std::copy(Record.begin(), Record.end(), F->ASTSignature.data()); + break; case DIAGNOSTIC_OPTIONS: { bool Complain = (ClientLoadCapabilities & ARR_OutOfDate) == 0; if (Listener && ValidateDiagnosticOptions && @@ -6348,7 +6353,8 @@ assert(I != GlobalTypeMap.end() && "Corrupted global type map"); ModuleFile *M = I->second; return RecordLocation( - M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset()); + M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset() + + M->ASTBlockBitOffset); } static llvm::Optional getTypeClassForCode(TypeCode code) { diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -2869,7 +2869,7 @@ const DeclOffset &DOffs = M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; Loc = TranslateSourceLocation(*M, DOffs.getLocation()); - return RecordLocation(M, DOffs.getBitOffset()); + return RecordLocation(M, DOffs.getBitOffset(M->ASTBlockBitOffset)); } ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -10,14 +10,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/AST/OpenMPClause.h" -#include "clang/Serialization/ASTRecordWriter.h" #include "ASTCommon.h" #include "ASTReaderInternals.h" #include "MultiOnDiskHashTable.h" -#include "clang/AST/AbstractTypeWriter.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTUnresolvedSet.h" +#include "clang/AST/AbstractTypeWriter.h" #include "clang/AST/Attr.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" @@ -31,6 +29,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/LambdaCapture.h" #include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/OpenMPClause.h" #include "clang/AST/RawCommentList.h" #include "clang/AST/TemplateName.h" #include "clang/AST/Type.h" @@ -65,7 +64,9 @@ #include "clang/Sema/ObjCMethodList.h" #include "clang/Sema/Sema.h" #include "clang/Sema/Weak.h" +#include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTReader.h" +#include "clang/Serialization/ASTRecordWriter.h" #include "clang/Serialization/InMemoryModuleCache.h" #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/ModuleFileExtension.h" @@ -960,6 +961,7 @@ BLOCK(UNHASHED_CONTROL_BLOCK); RECORD(SIGNATURE); + RECORD(AST_SIGNATURE); RECORD(DIAGNOSTIC_OPTIONS); RECORD(DIAG_PRAGMA_MAPPINGS); @@ -1025,22 +1027,38 @@ return Filename + Pos; } -ASTFileSignature ASTWriter::createSignature(StringRef Bytes) { - // Calculate the hash till start of UNHASHED_CONTROL_BLOCK. +std::pair +ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) { + // Calculate the hash of the AST block llvm::SHA1 Hasher; - Hasher.update(ArrayRef(Bytes.bytes_begin(), Bytes.size())); + Hasher.update(ASTBlockBytes); auto Hash = Hasher.result(); // Convert to an array [5*i32]. - ASTFileSignature Signature; auto LShift = [&](unsigned char Val, unsigned Shift) { return (uint32_t)Val << Shift; }; + + ASTFileSignature ASTSignature; + for (int I = 0; I != 5; ++I) + ASTSignature[I] = LShift(Hash[I * 4 + 0], 24) | + LShift(Hash[I * 4 + 1], 16) | LShift(Hash[I * 4 + 2], 8) | + LShift(Hash[I * 4 + 3], 0); + + // Add the remaing bytes (i.e. bytes before the unhashed control block that + // are not part of the AST block) + Hasher.update( + AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin())); + Hasher.update( + AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end())); + + Hash = Hasher.result(); + ASTFileSignature Signature; for (int I = 0; I != 5; ++I) Signature[I] = LShift(Hash[I * 4 + 0], 24) | LShift(Hash[I * 4 + 1], 16) | LShift(Hash[I * 4 + 2], 8) | LShift(Hash[I * 4 + 3], 0); - return Signature; + return std::make_pair(ASTSignature, Signature); } ASTFileSignature ASTWriter::writeUnhashedControlBlock(Preprocessor &PP, @@ -1054,10 +1072,18 @@ Stream.EnterSubblock(UNHASHED_CONTROL_BLOCK_ID, 5); // For implicit modules, write the hash of the PCM as its signature. + ASTFileSignature ASTSignature; ASTFileSignature Signature; + auto ASTBlockStartByte = ASTBlockRange.first >> 3; + auto ASTBlockByteLength = (ASTBlockRange.second >> 3) - ASTBlockStartByte; + std::tie(ASTSignature, Signature) = createSignature( + StringRef(Buffer.begin(), StartOfUnhashedControl), + StringRef(Buffer.begin() + ASTBlockStartByte, ASTBlockByteLength)); if (WritingModule && PP.getHeaderSearchInfo().getHeaderSearchOpts().ModulesHashContent) { - Signature = createSignature(StringRef(Buffer.begin(), StartOfUnhashedControl)); + Record.append(ASTSignature.begin(), ASTSignature.end()); + Stream.EmitRecord(AST_SIGNATURE, Record); + Record.clear(); Record.append(Signature.begin(), Signature.end()); Stream.EmitRecord(SIGNATURE, Record); Record.clear(); @@ -2049,7 +2075,7 @@ RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), SourceMgr.getNextLocalOffset() - 1 /* skip dummy */, - SLocEntryOffsetsBase}; + SLocEntryOffsetsBase - ASTBlockRange.first}; Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets)); } @@ -2328,7 +2354,7 @@ { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), FirstMacroID - NUM_PREDEF_MACRO_IDS, - MacroOffsetsBase}; + MacroOffsetsBase - ASTBlockRange.first}; Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets)); } } @@ -2842,7 +2868,7 @@ assert(Idx.getIndex() >= FirstTypeID && "Re-writing a type from a prior AST"); // Emit the type's representation. - uint64_t Offset = ASTTypeWriter(*this).write(T); + uint64_t Offset = ASTTypeWriter(*this).write(T) - ASTBlockRange.first; // Record the offset for this type. unsigned Index = Idx.getIndex() - FirstTypeID; @@ -4552,6 +4578,8 @@ WriteControlBlock(PP, Context, isysroot, OutputFile); // Write the remaining AST contents. + Stream.FlushToWord(); + ASTBlockRange.first = Stream.GetCurrentBitNo(); Stream.EnterSubblock(AST_BLOCK_ID, 5); // This is so that older clang versions, before the introduction @@ -4683,9 +4711,9 @@ // c++-base-specifiers-id:i32 // type-id:i32) // - // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule or - // MK_ExplicitModule, then the module-name is the module name. Otherwise, - // it is the module file name. + // module-kind is the ModuleKind enum value. If it is MK_PrebuiltModule, + // MK_ExplicitModule or MK_ImplicitModule, then the module-name is the + // module name. Otherwise, it is the module file name. auto Abbrev = std::make_shared(); Abbrev->Add(BitCodeAbbrevOp(MODULE_OFFSET_MAP)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); @@ -4698,10 +4726,11 @@ endian::Writer LE(Out, little); LE.write(static_cast(M.Kind)); - StringRef Name = - M.Kind == MK_PrebuiltModule || M.Kind == MK_ExplicitModule - ? M.ModuleName - : M.FileName; + StringRef Name = M.Kind == MK_PrebuiltModule || + M.Kind == MK_ExplicitModule || + M.Kind == MK_ImplicitModule + ? M.ModuleName + : M.FileName; LE.write(Name.size()); Out.write(Name.data(), Name.size()); @@ -4913,6 +4942,7 @@ NumStatements, NumMacros, NumLexicalDeclContexts, NumVisibleDeclContexts}; Stream.EmitRecord(STATISTICS, Record); Stream.ExitBlock(); + ASTBlockRange.second = Stream.GetCurrentBitNo(); // Write the module file extension blocks. for (const auto &ExtWriter : ModuleFileExtensionWriters) diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2428,7 +2428,7 @@ W.Visit(D); // Emit this declaration to the bitstream. - uint64_t Offset = W.Emit(D); + uint64_t Offset = W.Emit(D) - ASTBlockRange.first; // Record the offset for this declaration SourceLocation Loc = D->getLocation(); diff --git a/clang/test/Modules/ASTSignature.c b/clang/test/Modules/ASTSignature.c new file mode 100644 --- /dev/null +++ b/clang/test/Modules/ASTSignature.c @@ -0,0 +1,21 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -iquote %S/Inputs/ASTHash/ -fsyntax-only -fmodules \ +// RUN: -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: -fmodules-cache-path=%t %s -Rmodule-build 2> %t1 +// RUN: %clang_cc1 -iquote "/dev/null" -iquote %S/Inputs/ASTHash/ -fsyntax-only \ +// RUN: -fmodules -fimplicit-module-maps -fmodules-strict-context-hash \ +// RUN: -fmodules-cache-path=%t %s -Rmodule-build 2> %t2 +// RUN: sed -n "s/.* building module 'MyHeader2' as '\(.*\)' .*/\1/gp" %t1 \ +// RUN: | xargs llvm-bcanalyzer --dump --disable-histogram | cat > %t1.dump +// RUN: sed -n "s/.* building module 'MyHeader2' as '\(.*\)' .*/\1/gp" %t2 \ +// RUN: | xargs llvm-bcanalyzer --dump --disable-histogram | cat > %t2.dump +// RUN: cat %t1.dump %t2.dump | FileCheck %s + +#include "my_header_2.h" + +my_int var = 42; + +// CHECK: [[SIGNATURE:]] +// CHECK: [[SIGNATURE]] +// The modules built by this test are designed to yield the same AST. If this +// test fails, it means that the AST block is has become non-relocatable. diff --git a/clang/test/Modules/Inputs/ASTHash/module.modulemap b/clang/test/Modules/Inputs/ASTHash/module.modulemap new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/module.modulemap @@ -0,0 +1,8 @@ +module MyHeader1 { + header "my_header_1.h" +} + +module MyHeader2 { + header "my_header_2.h" + export * +} diff --git a/clang/test/Modules/Inputs/ASTHash/my_header_1.h b/clang/test/Modules/Inputs/ASTHash/my_header_1.h new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/my_header_1.h @@ -0,0 +1 @@ +typedef int my_int; diff --git a/clang/test/Modules/Inputs/ASTHash/my_header_2.h b/clang/test/Modules/Inputs/ASTHash/my_header_2.h new file mode 100644 --- /dev/null +++ b/clang/test/Modules/Inputs/ASTHash/my_header_2.h @@ -0,0 +1,3 @@ +#include "my_header_1.h" + +extern my_int var;