diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. - const unsigned VERSION_MAJOR = 9; + const unsigned VERSION_MAJOR = 10; /// AST file minor version number supported by this version of /// Clang. @@ -181,7 +181,7 @@ /// Raw source location of end of range. unsigned End; - /// Offset in the AST file. + /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase. uint32_t BitOffset; PPEntityOffset(SourceRange R, uint32_t BitOffset) @@ -218,11 +218,14 @@ /// Source range/offset of a preprocessed entity. struct DeclOffset { - /// Raw source location. - unsigned Loc = 0; + /// Raw source location. The unsigned i.e. 32-bit integer is enough for + /// raw encoded location but it results in padding gap. This structure + /// is serialized as is to the AST file and undefined value in the + /// padding affects AST hash. Use uint64_t to avoid padding gap. + uint64_t Loc = 0; /// Offset in the AST file. - uint32_t BitOffset = 0; + uint64_t BitOffset = 0; DeclOffset() = default; DeclOffset(SourceLocation Loc, uint32_t BitOffset) diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -723,9 +723,10 @@ struct PendingMacroInfo { ModuleFile *M; - uint64_t MacroDirectivesOffset; + /// Offset relative to ModuleFile::MacroOffsetsBase. + uint32_t MacroDirectivesOffset; - PendingMacroInfo(ModuleFile *M, uint64_t MacroDirectivesOffset) + PendingMacroInfo(ModuleFile *M, uint32_t MacroDirectivesOffset) : M(M), MacroDirectivesOffset(MacroDirectivesOffset) {} }; @@ -2205,7 +2206,7 @@ /// \param MacroDirectivesOffset Offset of the serialized macro directive /// history. void addPendingMacro(IdentifierInfo *II, ModuleFile *M, - uint64_t MacroDirectivesOffset); + uint32_t MacroDirectivesOffset); /// Read the set of macros defined by this external macro source. void ReadDefinedMacros() override; diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -243,7 +243,7 @@ /// Offset of each type in the bitstream, indexed by /// the type's ID. - std::vector TypeOffsets; + std::vector TypeOffsets; /// The first ID number we can use for our own identifiers. serialization::IdentID FirstIdentID = serialization::NUM_PREDEF_IDENT_IDS; @@ -277,7 +277,8 @@ /// The macro infos to emit. std::vector MacroInfosToEmit; - llvm::DenseMap IdentMacroDirectivesOffsetMap; + llvm::DenseMap + IdentMacroDirectivesOffsetMap; /// @name FlushStmt Caches /// @{ @@ -464,7 +465,8 @@ const Preprocessor &PP); void WritePreprocessor(const Preprocessor &PP, bool IsModule); void WriteHeaderSearch(const HeaderSearch &HS); - void WritePreprocessorDetail(PreprocessingRecord &PPRec); + void WritePreprocessorDetail(PreprocessingRecord &PPRec, + uint64_t MacroOffsetsBase); void WriteSubmodules(Module *WritingModule); void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, @@ -588,7 +590,7 @@ /// Determine the ID of an already-emitted macro. serialization::MacroID getMacroID(MacroInfo *MI); - uint64_t getMacroDirectivesOffset(const IdentifierInfo *Name); + uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name); /// Emit a reference to a type. void AddTypeRef(QualType T, RecordDataImpl &Record); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -251,6 +251,10 @@ /// The base offset in the source manager's view of this module. unsigned SLocEntryBaseOffset = 0; + /// Base file offset for the offsets in SLocEntryOffsets. Real file offset + /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. + uint64_t SLocEntryOffsetsBase = 0; + /// Offsets for all of the source location entries in the /// AST file. const uint32_t *SLocEntryOffsets = nullptr; @@ -302,6 +306,10 @@ /// The number of macros in this AST file. unsigned LocalNumMacros = 0; + /// Base file offset for the offsets in MacroOffsets. Real file offset for + /// the entry is MacroOffsetsBase + MacroOffsets[i]. + uint64_t MacroOffsetsBase = 0; + /// Offsets of macros in the preprocessor block. /// /// This array is indexed by the macro ID (-1), and provides @@ -450,7 +458,7 @@ /// Offset of each type within the bitstream, indexed by the /// type ID, or the representation of a Type*. - const uint32_t *TypeOffsets = nullptr; + const uint64_t *TypeOffsets = nullptr; /// Base type ID for types local to this module as represented in /// the global type ID space. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1468,6 +1468,7 @@ ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second; if (llvm::Error Err = F->SLocEntryCursor.JumpToBit( + F->SLocEntryOffsetsBase + F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) { Error(std::move(Err)); return true; @@ -1930,9 +1931,8 @@ return HFI; } -void ASTReader::addPendingMacro(IdentifierInfo *II, - ModuleFile *M, - uint64_t MacroDirectivesOffset) { +void ASTReader::addPendingMacro(IdentifierInfo *II, ModuleFile *M, + uint32_t MacroDirectivesOffset) { assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard"); PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset)); } @@ -2097,7 +2097,8 @@ BitstreamCursor &Cursor = M.MacroCursor; SavedStreamPosition SavedPosition(Cursor); - if (llvm::Error Err = Cursor.JumpToBit(PMInfo.MacroDirectivesOffset)) { + if (llvm::Error Err = + Cursor.JumpToBit(M.MacroOffsetsBase + PMInfo.MacroDirectivesOffset)) { Error(std::move(Err)); return; } @@ -3096,7 +3097,7 @@ Error("duplicate TYPE_OFFSET record in AST file"); return Failure; } - F.TypeOffsets = (const uint32_t *)Blob.data(); + F.TypeOffsets = reinterpret_cast(Blob.data()); F.LocalNumTypes = Record[0]; unsigned LocalBaseTypeIndex = Record[1]; F.BaseTypeIndex = getTotalNumTypes(); @@ -3374,6 +3375,7 @@ F.SLocEntryOffsets = (const uint32_t *)Blob.data(); F.LocalNumSLocEntries = Record[0]; unsigned SLocSpaceSize = Record[1]; + F.SLocEntryOffsetsBase = Record[2]; std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) = SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries, SLocSpaceSize); @@ -3692,6 +3694,7 @@ F.MacroOffsets = (const uint32_t *)Blob.data(); F.LocalNumMacros = Record[0]; unsigned LocalBaseMacroID = Record[1]; + F.MacroOffsetsBase = Record[2]; F.BaseMacroID = getTotalNumMacros(); if (F.LocalNumMacros > 0) { @@ -5905,8 +5908,8 @@ } SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor); - if (llvm::Error Err = - M.PreprocessorDetailCursor.JumpToBit(PPOffs.BitOffset)) { + if (llvm::Error Err = M.PreprocessorDetailCursor.JumpToBit( + M.MacroOffsetsBase + PPOffs.BitOffset)) { Error(std::move(Err)); return nullptr; } @@ -8416,7 +8419,8 @@ assert(I != GlobalMacroMap.end() && "Corrupted global macro map"); ModuleFile *M = I->second; unsigned Index = ID - M->BaseMacroID; - MacrosLoaded[ID] = ReadMacroRecord(*M, M->MacroOffsets[Index]); + MacrosLoaded[ID] = + ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]); if (DeserializationListener) DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS, diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1892,6 +1892,7 @@ // Write out the source location entry table. We skip the first // entry, which is always the same dummy entry. std::vector SLocEntryOffsets; + uint64_t SLocEntryOffsetsBase = Stream.GetCurrentBitNo(); RecordData PreloadSLocs; SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1); for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); @@ -1902,7 +1903,9 @@ assert(&SourceMgr.getSLocEntry(FID) == SLoc); // Record the offset of this source-location entry. - SLocEntryOffsets.push_back(Stream.GetCurrentBitNo()); + uint64_t Offset = Stream.GetCurrentBitNo() - SLocEntryOffsetsBase; + assert((Offset >> 32) == 0 && "SLocEntry offset too large"); + SLocEntryOffsets.push_back(Offset); // Figure out which record code to use. unsigned Code; @@ -2010,12 +2013,14 @@ Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), - SourceMgr.getNextLocalOffset() - 1 /* skip dummy */}; + SourceMgr.getNextLocalOffset() - 1 /* skip dummy */, + SLocEntryOffsetsBase}; Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets)); } @@ -2092,9 +2097,11 @@ /// Writes the block containing the serialized form of the /// preprocessor. void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { + uint64_t MacroOffsetsBase = Stream.GetCurrentBitNo(); + PreprocessingRecord *PPRec = PP.getPreprocessingRecord(); if (PPRec) - WritePreprocessorDetail(*PPRec); + WritePreprocessorDetail(*PPRec, MacroOffsetsBase); RecordData Record; RecordData ModuleMacroRecord; @@ -2155,7 +2162,8 @@ // identifier they belong to. for (const IdentifierInfo *Name : MacroIdentifiers) { MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name); - auto StartOffset = Stream.GetCurrentBitNo(); + uint64_t StartOffset = Stream.GetCurrentBitNo() - MacroOffsetsBase; + assert((StartOffset >> 32) == 0 && "Macro identifiers offset too large"); // Emit the macro directives in reverse source order. for (; MD; MD = MD->getPrevious()) { @@ -2228,14 +2236,12 @@ // Record the local offset of this macro. unsigned Index = ID - FirstMacroID; - if (Index == MacroOffsets.size()) - MacroOffsets.push_back(Stream.GetCurrentBitNo()); - else { - if (Index > MacroOffsets.size()) - MacroOffsets.resize(Index + 1); + if (Index >= MacroOffsets.size()) + MacroOffsets.resize(Index + 1); - MacroOffsets[Index] = Stream.GetCurrentBitNo(); - } + uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase; + assert((Offset >> 32) == 0 && "Macro offset too large"); + MacroOffsets[Index] = Offset; AddIdentifierRef(Name, Record); AddSourceLocation(MI->getDefinitionLoc(), Record); @@ -2286,17 +2292,20 @@ Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), - FirstMacroID - NUM_PREDEF_MACRO_IDS}; + FirstMacroID - NUM_PREDEF_MACRO_IDS, + MacroOffsetsBase}; Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets)); } } -void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) { +void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, + uint64_t MacroOffsetsBase) { if (PPRec.local_begin() == PPRec.local_end()) return; @@ -2333,8 +2342,10 @@ (void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) { Record.clear(); + uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase; + assert((Offset >> 32) == 0 && "Preprocessed entity offset too large"); PreprocessedEntityOffsets.push_back( - PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo())); + PPEntityOffset((*E)->getSourceRange(), Offset)); if (auto *MD = dyn_cast(*E)) { // Record this macro definition's ID. @@ -5141,7 +5152,7 @@ return MacroIDs[MI]; } -uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) { +uint32_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) { return IdentMacroDirectivesOffsetMap.lookup(Name); }