diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. - const unsigned VERSION_MAJOR = 9; + const unsigned VERSION_MAJOR = 10; /// AST file minor version number supported by this version of /// Clang. @@ -181,7 +181,7 @@ /// Raw source location of end of range. unsigned End; - /// Offset in the AST file. + /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase. uint32_t BitOffset; PPEntityOffset(SourceRange R, uint32_t BitOffset) @@ -216,17 +216,41 @@ } }; - /// Source range/offset of a preprocessed entity. + /// Offset in the AST file. Use splitted 64-bit integer into low/high + /// parts to keep structure alignment 32-bit (it is important because + /// blobs in bitstream are 32-bit aligned). This structure is serialized + /// "as is" to the AST file. + struct UnderalignedInt64 { + uint32_t BitOffsetLow = 0; + uint32_t BitOffsetHigh = 0; + + UnderalignedInt64() = default; + UnderalignedInt64(uint64_t BitOffset) { setBitOffset(BitOffset); } + + void setBitOffset(uint64_t Offset) { + BitOffsetLow = Offset; + BitOffsetHigh = Offset >> 32; + } + + uint64_t getBitOffset() const { + return BitOffsetLow | (uint64_t(BitOffsetHigh) << 32); + } + }; + + /// Source location and bit offset of a declaration. struct DeclOffset { /// Raw source location. unsigned Loc = 0; - /// Offset in the AST file. - uint32_t BitOffset = 0; + /// Offset in the AST file. Keep structure alignment 32-bit and avoid + /// padding gap because undefined value in the padding affects AST hash. + UnderalignedInt64 BitOffset; DeclOffset() = default; - DeclOffset(SourceLocation Loc, uint32_t BitOffset) - : Loc(Loc.getRawEncoding()), BitOffset(BitOffset) {} + DeclOffset(SourceLocation Loc, uint64_t BitOffset) { + setLocation(Loc); + setBitOffset(BitOffset); + } void setLocation(SourceLocation L) { Loc = L.getRawEncoding(); @@ -235,6 +259,14 @@ SourceLocation getLocation() const { return SourceLocation::getFromRawEncoding(Loc); } + + void setBitOffset(uint64_t Offset) { + BitOffset.setBitOffset(Offset); + } + + uint64_t getBitOffset() const { + return BitOffset.getBitOffset(); + } }; /// The number of predefined preprocessed entity IDs. diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -723,9 +723,10 @@ struct PendingMacroInfo { ModuleFile *M; - uint64_t MacroDirectivesOffset; + /// Offset relative to ModuleFile::MacroOffsetsBase. + uint32_t MacroDirectivesOffset; - PendingMacroInfo(ModuleFile *M, uint64_t MacroDirectivesOffset) + PendingMacroInfo(ModuleFile *M, uint32_t MacroDirectivesOffset) : M(M), MacroDirectivesOffset(MacroDirectivesOffset) {} }; @@ -2205,7 +2206,7 @@ /// \param MacroDirectivesOffset Offset of the serialized macro directive /// history. void addPendingMacro(IdentifierInfo *II, ModuleFile *M, - uint64_t MacroDirectivesOffset); + uint32_t MacroDirectivesOffset); /// Read the set of macros defined by this external macro source. void ReadDefinedMacros() override; diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -243,7 +243,7 @@ /// Offset of each type in the bitstream, indexed by /// the type's ID. - std::vector TypeOffsets; + std::vector TypeOffsets; /// The first ID number we can use for our own identifiers. serialization::IdentID FirstIdentID = serialization::NUM_PREDEF_IDENT_IDS; @@ -277,7 +277,8 @@ /// The macro infos to emit. std::vector MacroInfosToEmit; - llvm::DenseMap IdentMacroDirectivesOffsetMap; + llvm::DenseMap + IdentMacroDirectivesOffsetMap; /// @name FlushStmt Caches /// @{ @@ -464,7 +465,8 @@ const Preprocessor &PP); void WritePreprocessor(const Preprocessor &PP, bool IsModule); void WriteHeaderSearch(const HeaderSearch &HS); - void WritePreprocessorDetail(PreprocessingRecord &PPRec); + void WritePreprocessorDetail(PreprocessingRecord &PPRec, + uint64_t MacroOffsetsBase); void WriteSubmodules(Module *WritingModule); void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, @@ -588,7 +590,7 @@ /// Determine the ID of an already-emitted macro. serialization::MacroID getMacroID(MacroInfo *MI); - uint64_t getMacroDirectivesOffset(const IdentifierInfo *Name); + uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name); /// Emit a reference to a type. void AddTypeRef(QualType T, RecordDataImpl &Record); diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h --- a/clang/include/clang/Serialization/ModuleFile.h +++ b/clang/include/clang/Serialization/ModuleFile.h @@ -251,6 +251,10 @@ /// The base offset in the source manager's view of this module. unsigned SLocEntryBaseOffset = 0; + /// Base file offset for the offsets in SLocEntryOffsets. Real file offset + /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. + uint64_t SLocEntryOffsetsBase = 0; + /// Offsets for all of the source location entries in the /// AST file. const uint32_t *SLocEntryOffsets = nullptr; @@ -302,6 +306,10 @@ /// The number of macros in this AST file. unsigned LocalNumMacros = 0; + /// Base file offset for the offsets in MacroOffsets. Real file offset for + /// the entry is MacroOffsetsBase + MacroOffsets[i]. + uint64_t MacroOffsetsBase = 0; + /// Offsets of macros in the preprocessor block. /// /// This array is indexed by the macro ID (-1), and provides @@ -450,7 +458,7 @@ /// Offset of each type within the bitstream, indexed by the /// type ID, or the representation of a Type*. - const uint32_t *TypeOffsets = nullptr; + const UnderalignedInt64 *TypeOffsets = nullptr; /// Base type ID for types local to this module as represented in /// the global type ID space. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -1470,6 +1470,7 @@ ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second; if (llvm::Error Err = F->SLocEntryCursor.JumpToBit( + F->SLocEntryOffsetsBase + F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) { Error(std::move(Err)); return true; @@ -1932,9 +1933,8 @@ return HFI; } -void ASTReader::addPendingMacro(IdentifierInfo *II, - ModuleFile *M, - uint64_t MacroDirectivesOffset) { +void ASTReader::addPendingMacro(IdentifierInfo *II, ModuleFile *M, + uint32_t MacroDirectivesOffset) { assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard"); PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset)); } @@ -2099,7 +2099,8 @@ BitstreamCursor &Cursor = M.MacroCursor; SavedStreamPosition SavedPosition(Cursor); - if (llvm::Error Err = Cursor.JumpToBit(PMInfo.MacroDirectivesOffset)) { + if (llvm::Error Err = + Cursor.JumpToBit(M.MacroOffsetsBase + PMInfo.MacroDirectivesOffset)) { Error(std::move(Err)); return; } @@ -3098,7 +3099,7 @@ Error("duplicate TYPE_OFFSET record in AST file"); return Failure; } - F.TypeOffsets = (const uint32_t *)Blob.data(); + F.TypeOffsets = reinterpret_cast(Blob.data()); F.LocalNumTypes = Record[0]; unsigned LocalBaseTypeIndex = Record[1]; F.BaseTypeIndex = getTotalNumTypes(); @@ -3376,6 +3377,7 @@ F.SLocEntryOffsets = (const uint32_t *)Blob.data(); F.LocalNumSLocEntries = Record[0]; unsigned SLocSpaceSize = Record[1]; + F.SLocEntryOffsetsBase = Record[2]; std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) = SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries, SLocSpaceSize); @@ -3694,6 +3696,7 @@ F.MacroOffsets = (const uint32_t *)Blob.data(); F.LocalNumMacros = Record[0]; unsigned LocalBaseMacroID = Record[1]; + F.MacroOffsetsBase = Record[2]; F.BaseMacroID = getTotalNumMacros(); if (F.LocalNumMacros > 0) { @@ -5907,8 +5910,8 @@ } SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor); - if (llvm::Error Err = - M.PreprocessorDetailCursor.JumpToBit(PPOffs.BitOffset)) { + if (llvm::Error Err = M.PreprocessorDetailCursor.JumpToBit( + M.MacroOffsetsBase + PPOffs.BitOffset)) { Error(std::move(Err)); return nullptr; } @@ -6321,7 +6324,8 @@ GlobalTypeMapType::iterator I = GlobalTypeMap.find(Index); assert(I != GlobalTypeMap.end() && "Corrupted global type map"); ModuleFile *M = I->second; - return RecordLocation(M, M->TypeOffsets[Index - M->BaseTypeIndex]); + return RecordLocation( + M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset()); } static llvm::Optional getTypeClassForCode(TypeCode code) { @@ -8427,7 +8431,8 @@ assert(I != GlobalMacroMap.end() && "Corrupted global macro map"); ModuleFile *M = I->second; unsigned Index = ID - M->BaseMacroID; - MacrosLoaded[ID] = ReadMacroRecord(*M, M->MacroOffsets[Index]); + MacrosLoaded[ID] = + ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]); if (DeserializationListener) DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS, diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp --- a/clang/lib/Serialization/ASTReaderDecl.cpp +++ b/clang/lib/Serialization/ASTReaderDecl.cpp @@ -2870,7 +2870,7 @@ const DeclOffset &DOffs = M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS]; Loc = TranslateSourceLocation(*M, DOffs.getLocation()); - return RecordLocation(M, DOffs.BitOffset); + return RecordLocation(M, DOffs.getBitOffset()); } ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) { diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -1893,6 +1893,7 @@ // Write out the source location entry table. We skip the first // entry, which is always the same dummy entry. std::vector SLocEntryOffsets; + uint64_t SLocEntryOffsetsBase = Stream.GetCurrentBitNo(); RecordData PreloadSLocs; SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1); for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size(); @@ -1903,7 +1904,9 @@ assert(&SourceMgr.getSLocEntry(FID) == SLoc); // Record the offset of this source-location entry. - SLocEntryOffsets.push_back(Stream.GetCurrentBitNo()); + uint64_t Offset = Stream.GetCurrentBitNo() - SLocEntryOffsetsBase; + assert((Offset >> 32) == 0 && "SLocEntry offset too large"); + SLocEntryOffsets.push_back(Offset); // Figure out which record code to use. unsigned Code; @@ -2011,12 +2014,14 @@ Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = { SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(), - SourceMgr.getNextLocalOffset() - 1 /* skip dummy */}; + SourceMgr.getNextLocalOffset() - 1 /* skip dummy */, + SLocEntryOffsetsBase}; Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record, bytes(SLocEntryOffsets)); } @@ -2093,9 +2098,11 @@ /// Writes the block containing the serialized form of the /// preprocessor. void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { + uint64_t MacroOffsetsBase = Stream.GetCurrentBitNo(); + PreprocessingRecord *PPRec = PP.getPreprocessingRecord(); if (PPRec) - WritePreprocessorDetail(*PPRec); + WritePreprocessorDetail(*PPRec, MacroOffsetsBase); RecordData Record; RecordData ModuleMacroRecord; @@ -2156,7 +2163,8 @@ // identifier they belong to. for (const IdentifierInfo *Name : MacroIdentifiers) { MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name); - auto StartOffset = Stream.GetCurrentBitNo(); + uint64_t StartOffset = Stream.GetCurrentBitNo() - MacroOffsetsBase; + assert((StartOffset >> 32) == 0 && "Macro identifiers offset too large"); // Emit the macro directives in reverse source order. for (; MD; MD = MD->getPrevious()) { @@ -2229,14 +2237,12 @@ // Record the local offset of this macro. unsigned Index = ID - FirstMacroID; - if (Index == MacroOffsets.size()) - MacroOffsets.push_back(Stream.GetCurrentBitNo()); - else { - if (Index > MacroOffsets.size()) - MacroOffsets.resize(Index + 1); + if (Index >= MacroOffsets.size()) + MacroOffsets.resize(Index + 1); - MacroOffsets[Index] = Stream.GetCurrentBitNo(); - } + uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase; + assert((Offset >> 32) == 0 && "Macro offset too large"); + MacroOffsets[Index] = Offset; AddIdentifierRef(Name, Record); AddSourceLocation(MI->getDefinitionLoc(), Record); @@ -2287,17 +2293,20 @@ Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev)); { RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(), - FirstMacroID - NUM_PREDEF_MACRO_IDS}; + FirstMacroID - NUM_PREDEF_MACRO_IDS, + MacroOffsetsBase}; Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets)); } } -void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) { +void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec, + uint64_t MacroOffsetsBase) { if (PPRec.local_begin() == PPRec.local_end()) return; @@ -2334,8 +2343,10 @@ (void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) { Record.clear(); + uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase; + assert((Offset >> 32) == 0 && "Preprocessed entity offset too large"); PreprocessedEntityOffsets.push_back( - PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo())); + PPEntityOffset((*E)->getSourceRange(), Offset)); if (auto *MD = dyn_cast(*E)) { // Record this macro definition's ID. @@ -2808,10 +2819,10 @@ // Record the offset for this type. unsigned Index = Idx.getIndex() - FirstTypeID; if (TypeOffsets.size() == Index) - TypeOffsets.push_back(Offset); + TypeOffsets.emplace_back(Offset); else if (TypeOffsets.size() < Index) { TypeOffsets.resize(Index + 1); - TypeOffsets[Index] = Offset; + TypeOffsets[Index].setBitOffset(Offset); } else { llvm_unreachable("Types emitted in wrong order"); } @@ -5144,7 +5155,7 @@ return MacroIDs[MI]; } -uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) { +uint32_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) { return IdentMacroDirectivesOffsetMap.lookup(Name); } diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -2434,12 +2434,12 @@ SourceLocation Loc = D->getLocation(); unsigned Index = ID - FirstDeclID; if (DeclOffsets.size() == Index) - DeclOffsets.push_back(DeclOffset(Loc, Offset)); + DeclOffsets.emplace_back(Loc, Offset); else if (DeclOffsets.size() < Index) { // FIXME: Can/should this happen? DeclOffsets.resize(Index+1); DeclOffsets[Index].setLocation(Loc); - DeclOffsets[Index].BitOffset = Offset; + DeclOffsets[Index].setBitOffset(Offset); } else { llvm_unreachable("declarations should be emitted in ID order"); }