diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -32,6 +32,7 @@ #include "clang/Serialization/ModuleFile.h" #include "clang/Serialization/ModuleFileExtension.h" #include "clang/Serialization/ModuleManager.h" +#include "clang/Serialization/SourceLocationEncoding.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -396,6 +397,8 @@ using ModuleReverseIterator = ModuleManager::ModuleReverseIterator; private: + using LocSeq = SourceLocationSequence; + /// The receiver of some callbacks invoked by ASTReader. std::unique_ptr Listener; @@ -2155,16 +2158,16 @@ /// Read a source location from raw form and return it in its /// originating module file's source location space. - SourceLocation - ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw) const { - return SourceLocation::getFromRawEncoding((Raw >> 1) | - (Raw << (8 * sizeof(Raw) - 1))); + SourceLocation ReadUntranslatedSourceLocation(SourceLocation::UIntTy Raw, + LocSeq *Seq = nullptr) const { + return SourceLocationEncoding::decode(Raw, Seq); } /// Read a source location from raw form. SourceLocation ReadSourceLocation(ModuleFile &ModuleFile, - SourceLocation::UIntTy Raw) const { - SourceLocation Loc = ReadUntranslatedSourceLocation(Raw); + SourceLocation::UIntTy Raw, + LocSeq *Seq = nullptr) const { + SourceLocation Loc = ReadUntranslatedSourceLocation(Raw, Seq); return TranslateSourceLocation(ModuleFile, Loc); } @@ -2184,14 +2187,14 @@ /// Read a source location. SourceLocation ReadSourceLocation(ModuleFile &ModuleFile, - const RecordDataImpl &Record, - unsigned &Idx) { - return ReadSourceLocation(ModuleFile, Record[Idx++]); + const RecordDataImpl &Record, unsigned &Idx, + LocSeq *Seq = nullptr) { + return ReadSourceLocation(ModuleFile, Record[Idx++], Seq); } /// Read a source range. - SourceRange ReadSourceRange(ModuleFile &F, - const RecordData &Record, unsigned &Idx); + SourceRange ReadSourceRange(ModuleFile &F, const RecordData &Record, + unsigned &Idx, LocSeq *Seq = nullptr); // Read a string static std::string ReadString(const RecordData &Record, unsigned &Idx); diff --git a/clang/include/clang/Serialization/ASTRecordReader.h b/clang/include/clang/Serialization/ASTRecordReader.h --- a/clang/include/clang/Serialization/ASTRecordReader.h +++ b/clang/include/clang/Serialization/ASTRecordReader.h @@ -18,6 +18,7 @@ #include "clang/AST/AbstractBasicReader.h" #include "clang/Lex/Token.h" #include "clang/Serialization/ASTReader.h" +#include "clang/Serialization/SourceLocationEncoding.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" @@ -30,6 +31,7 @@ class ASTRecordReader : public serialization::DataStreamBasicReader { using ModuleFile = serialization::ModuleFile; + using LocSeq = SourceLocationSequence; ASTReader *Reader; ModuleFile *F; @@ -160,8 +162,7 @@ TypeSourceInfo *readTypeSourceInfo(); /// Reads the location information for a type. - void readTypeLoc(TypeLoc TL); - + void readTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr); /// Map a local type ID within a given AST file to a global type ID. serialization::TypeID getGlobalTypeID(unsigned LocalID) const { @@ -271,13 +272,13 @@ void readOMPChildren(OMPChildren *Data); /// Read a source location, advancing Idx. - SourceLocation readSourceLocation() { - return Reader->ReadSourceLocation(*F, Record, Idx); + SourceLocation readSourceLocation(LocSeq *Seq = nullptr) { + return Reader->ReadSourceLocation(*F, Record, Idx, Seq); } /// Read a source range, advancing Idx. - SourceRange readSourceRange() { - return Reader->ReadSourceRange(*F, Record, Idx); + SourceRange readSourceRange(LocSeq *Seq = nullptr) { + return Reader->ReadSourceRange(*F, Record, Idx, Seq); } /// Read an arbitrary constant value, advancing Idx. diff --git a/clang/include/clang/Serialization/ASTRecordWriter.h b/clang/include/clang/Serialization/ASTRecordWriter.h --- a/clang/include/clang/Serialization/ASTRecordWriter.h +++ b/clang/include/clang/Serialization/ASTRecordWriter.h @@ -17,6 +17,7 @@ #include "clang/AST/AbstractBasicWriter.h" #include "clang/AST/OpenMPClause.h" #include "clang/Serialization/ASTWriter.h" +#include "clang/Serialization/SourceLocationEncoding.h" namespace clang { @@ -25,6 +26,8 @@ /// An object for streaming information to a record. class ASTRecordWriter : public serialization::DataStreamBasicWriter { + using LocSeq = SourceLocationSequence; + ASTWriter *Writer; ASTWriter::RecordDataImpl *Record; @@ -131,16 +134,16 @@ void AddFunctionDefinition(const FunctionDecl *FD); /// Emit a source location. - void AddSourceLocation(SourceLocation Loc) { - return Writer->AddSourceLocation(Loc, *Record); + void AddSourceLocation(SourceLocation Loc, LocSeq *Seq = nullptr) { + return Writer->AddSourceLocation(Loc, *Record, Seq); } void writeSourceLocation(SourceLocation Loc) { AddSourceLocation(Loc); } /// Emit a source range. - void AddSourceRange(SourceRange Range) { - return Writer->AddSourceRange(Range, *Record); + void AddSourceRange(SourceRange Range, LocSeq *Seq = nullptr) { + return Writer->AddSourceRange(Range, *Record, Seq); } void writeBool(bool Value) { @@ -206,7 +209,7 @@ void AddTypeSourceInfo(TypeSourceInfo *TInfo); /// Emits source location information for a type. Does not emit the type. - void AddTypeLoc(TypeLoc TL); + void AddTypeLoc(TypeLoc TL, LocSeq *Seq = nullptr); /// Emits a template argument location info. void AddTemplateArgumentLocInfo(TemplateArgument::ArgKind Kind, diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -25,6 +25,7 @@ #include "clang/Serialization/ASTBitCodes.h" #include "clang/Serialization/ASTDeserializationListener.h" #include "clang/Serialization/PCHContainerOperations.h" +#include "clang/Serialization/SourceLocationEncoding.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -104,6 +105,8 @@ using TypeIdxMap = llvm::DenseMap; + using LocSeq = SourceLocationSequence; + /// The bitstream writer used to emit this precompiled header. llvm::BitstreamWriter &Stream; @@ -581,10 +584,12 @@ RecordDataImpl &Record); /// Emit a source location. - void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record); + void AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, + LocSeq *Seq = nullptr); /// Emit a source range. - void AddSourceRange(SourceRange Range, RecordDataImpl &Record); + void AddSourceRange(SourceRange Range, RecordDataImpl &Record, + LocSeq *Seq = nullptr); /// Emit a reference to an identifier. void AddIdentifierRef(const IdentifierInfo *II, RecordDataImpl &Record); diff --git a/clang/include/clang/Serialization/SourceLocationEncoding.h b/clang/include/clang/Serialization/SourceLocationEncoding.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Serialization/SourceLocationEncoding.h @@ -0,0 +1,162 @@ +//===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Source locations are stored pervasively in the AST, making up a third of +// the size of typical serialized files. Storing them efficiently is important. +// +// We use integers optimized by VBR-encoding, because: +// - when abbrevations cannot be used, VBR6 encoding is our only choice +// - in the worst case a SourceLocation can be ~any 32-bit number, but in +// practice they are highly predictable +// +// We encode the integer so that likely values encode as small numbers that +// turn into few VBR chunks: +// - the invalid sentinel location is a very common value: it encodes as 0 +// - the "macro or not" bit is stored at the bottom of the integer +// (rather than at the top, as in memory), so macro locations can have +// small representations. +// - related locations (e.g. of a left and right paren pair) are usually +// similar, so when encoding a sequence of locations we store only +// differences between successive elements. +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/SourceLocation.h" + +#ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H +#define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H + +namespace clang { +class SourceLocationSequence; + +/// Serialized encoding of SourceLocations without context. +/// Optimized to have small unsigned values (=> small after VBR encoding). +/// +// Macro locations have the top bit set, we rotate by one so it is the low bit. +class SourceLocationEncoding { + using UIntTy = SourceLocation::UIntTy; + constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy); + + static UIntTy encodeRaw(UIntTy Raw) { + return (Raw << 1) | (Raw >> (UIntBits - 1)); + } + static UIntTy decodeRaw(UIntTy Raw) { + return (Raw >> 1) | (Raw << (UIntBits - 1)); + } + friend SourceLocationSequence; + +public: + static uint64_t encode(SourceLocation Loc, + SourceLocationSequence * = nullptr); + static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr); +}; + +/// Serialized encoding of a sequence of SourceLocations. +/// +/// Optimized to produce small values when locations with the sequence are +/// similar. Each element can be delta-encoded against the last nonzero element. +/// +/// Sequences should be started by creating a SourceLocationSequence::State, +/// and then passed around as SourceLocationSequence*. Example: +/// +/// // establishes a sequence +/// void EmitTopLevelThing() { +/// SourceLocationSequence::State Seq; +/// EmitContainedThing(Seq); +/// EmitRecursiveThing(Seq); +/// } +/// +/// // optionally part of a sequence +/// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { +/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); +/// } +/// +/// // establishes a sequence if there isn't one already +/// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { +/// SourceLocationSequence::State Seq(ParentSeq); +/// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); +/// EmitRecursiveThing(Seq); +/// } +/// +class SourceLocationSequence { + using UIntTy = SourceLocation::UIntTy; + using EncodedTy = uint64_t; + constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; + static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!"); + + // Prev stores the rotated last nonzero location. + UIntTy &Prev; + + // Zig-zag encoding turns small signed integers into small unsigned integers. + // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... + static UIntTy zigZag(UIntTy V) { + UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0); + return Sign ^ (V << 1); + } + static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } + + SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} + + EncodedTy encodeRaw(UIntTy Raw) { + if (Raw == 0) + return 0; + UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); + if (Prev == 0) + return Prev = Rotated; + UIntTy Delta = Rotated - Prev; + Prev = Rotated; + // Exactly one 33 bit value is possible! (1 << 32). + // This is because we have two representations of zero: trivial & relative. + return 1 + EncodedTy{zigZag(Delta)}; + } + UIntTy decodeRaw(EncodedTy Encoded) { + if (Encoded == 0) + return 0; + if (Prev == 0) + return SourceLocationEncoding::decodeRaw(Prev = Encoded); + return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1)); + } + +public: + SourceLocation decode(EncodedTy Encoded) { + return SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); + } + EncodedTy encode(SourceLocation Loc) { + return encodeRaw(Loc.getRawEncoding()); + } + + class State; +}; + +/// This object establishes a SourceLocationSequence. +class SourceLocationSequence::State { + UIntTy Prev = 0; + SourceLocationSequence Seq; + +public: + // If Parent is provided and non-null, then this root becomes part of that + // enclosing sequence instead of establishing a new one. + State(SourceLocationSequence *Parent = nullptr) + : Seq(Parent ? Parent->Prev : Prev) {} + + // Implicit conversion for uniform use of roots vs propagated sequences. + operator SourceLocationSequence *() { return &Seq; } +}; + +inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc, + SourceLocationSequence *Seq) { + return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); +} +inline SourceLocation +SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) { + return Seq ? Seq->decode(Encoded) + : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); +} + +} // namespace clang +#endif diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6466,11 +6466,13 @@ namespace clang { class TypeLocReader : public TypeLocVisitor { + using LocSeq = SourceLocationSequence; + ASTRecordReader &Reader; + LocSeq *Seq; - SourceLocation readSourceLocation() { - return Reader.readSourceLocation(); - } + SourceLocation readSourceLocation() { return Reader.readSourceLocation(Seq); } + SourceRange readSourceRange() { return Reader.readSourceRange(Seq); } TypeSourceInfo *GetTypeSourceInfo() { return Reader.readTypeSourceInfo(); @@ -6485,7 +6487,8 @@ } public: - TypeLocReader(ASTRecordReader &Reader) : Reader(Reader) {} + TypeLocReader(ASTRecordReader &Reader, LocSeq *Seq) + : Reader(Reader), Seq(Seq) {} // We want compile-time assurance that we've enumerated all of // these, so unfortunately we have to declare them first, then @@ -6582,7 +6585,7 @@ DependentAddressSpaceTypeLoc TL) { TL.setAttrNameLoc(readSourceLocation()); - TL.setAttrOperandParensRange(Reader.readSourceRange()); + TL.setAttrOperandParensRange(readSourceRange()); TL.setAttrExprOperand(Reader.readExpr()); } @@ -6606,7 +6609,7 @@ void TypeLocReader::VisitConstantMatrixTypeLoc(ConstantMatrixTypeLoc TL) { TL.setAttrNameLoc(readSourceLocation()); - TL.setAttrOperandParensRange(Reader.readSourceRange()); + TL.setAttrOperandParensRange(readSourceRange()); TL.setAttrRowOperand(Reader.readExpr()); TL.setAttrColumnOperand(Reader.readExpr()); } @@ -6614,7 +6617,7 @@ void TypeLocReader::VisitDependentSizedMatrixTypeLoc( DependentSizedMatrixTypeLoc TL) { TL.setAttrNameLoc(readSourceLocation()); - TL.setAttrOperandParensRange(Reader.readSourceRange()); + TL.setAttrOperandParensRange(readSourceRange()); TL.setAttrRowOperand(Reader.readExpr()); TL.setAttrColumnOperand(Reader.readExpr()); } @@ -6623,7 +6626,7 @@ TL.setLocalRangeBegin(readSourceLocation()); TL.setLParenLoc(readSourceLocation()); TL.setRParenLoc(readSourceLocation()); - TL.setExceptionSpecRange(Reader.readSourceRange()); + TL.setExceptionSpecRange(readSourceRange()); TL.setLocalRangeEnd(readSourceLocation()); for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i) { TL.setParam(i, Reader.readDeclAs()); @@ -6826,9 +6829,9 @@ TL.setNameLoc(readSourceLocation()); } - -void ASTRecordReader::readTypeLoc(TypeLoc TL) { - TypeLocReader TLR(*this); +void ASTRecordReader::readTypeLoc(TypeLoc TL, LocSeq *ParentSeq) { + LocSeq::State Seq(ParentSeq); + TypeLocReader TLR(*this, Seq); for (; !TL.isNull(); TL = TL.getNextTypeLoc()) TLR.Visit(TL); } @@ -8995,11 +8998,10 @@ return Builder.getWithLocInContext(Context); } -SourceRange -ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record, - unsigned &Idx) { - SourceLocation beg = ReadSourceLocation(F, Record, Idx); - SourceLocation end = ReadSourceLocation(F, Record, Idx); +SourceRange ASTReader::ReadSourceRange(ModuleFile &F, const RecordData &Record, + unsigned &Idx, LocSeq *Seq) { + SourceLocation beg = ReadSourceLocation(F, Record, Idx, Seq); + SourceLocation end = ReadSourceLocation(F, Record, Idx, Seq); return SourceRange(beg, end); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -245,10 +245,19 @@ }; class TypeLocWriter : public TypeLocVisitor { + using LocSeq = SourceLocationSequence; + ASTRecordWriter &Record; + LocSeq *Seq; + + void addSourceLocation(SourceLocation Loc) { + Record.AddSourceLocation(Loc, Seq); + } + void addSourceRange(SourceRange Range) { Record.AddSourceRange(Range, Seq); } public: - TypeLocWriter(ASTRecordWriter &Record) : Record(Record) {} + TypeLocWriter(ASTRecordWriter &Record, LocSeq *Seq) + : Record(Record), Seq(Seq) {} #define ABSTRACT_TYPELOC(CLASS, PARENT) #define TYPELOC(CLASS, PARENT) \ @@ -266,7 +275,7 @@ } void TypeLocWriter::VisitBuiltinTypeLoc(BuiltinTypeLoc TL) { - Record.AddSourceLocation(TL.getBuiltinLoc()); + addSourceLocation(TL.getBuiltinLoc()); if (TL.needsExtraLocalData()) { Record.push_back(TL.getWrittenTypeSpec()); Record.push_back(static_cast(TL.getWrittenSignSpec())); @@ -276,11 +285,11 @@ } void TypeLocWriter::VisitComplexTypeLoc(ComplexTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitPointerTypeLoc(PointerTypeLoc TL) { - Record.AddSourceLocation(TL.getStarLoc()); + addSourceLocation(TL.getStarLoc()); } void TypeLocWriter::VisitDecayedTypeLoc(DecayedTypeLoc TL) { @@ -292,25 +301,25 @@ } void TypeLocWriter::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) { - Record.AddSourceLocation(TL.getCaretLoc()); + addSourceLocation(TL.getCaretLoc()); } void TypeLocWriter::VisitLValueReferenceTypeLoc(LValueReferenceTypeLoc TL) { - Record.AddSourceLocation(TL.getAmpLoc()); + addSourceLocation(TL.getAmpLoc()); } void TypeLocWriter::VisitRValueReferenceTypeLoc(RValueReferenceTypeLoc TL) { - Record.AddSourceLocation(TL.getAmpAmpLoc()); + addSourceLocation(TL.getAmpAmpLoc()); } void TypeLocWriter::VisitMemberPointerTypeLoc(MemberPointerTypeLoc TL) { - Record.AddSourceLocation(TL.getStarLoc()); + addSourceLocation(TL.getStarLoc()); Record.AddTypeSourceInfo(TL.getClassTInfo()); } void TypeLocWriter::VisitArrayTypeLoc(ArrayTypeLoc TL) { - Record.AddSourceLocation(TL.getLBracketLoc()); - Record.AddSourceLocation(TL.getRBracketLoc()); + addSourceLocation(TL.getLBracketLoc()); + addSourceLocation(TL.getRBracketLoc()); Record.push_back(TL.getSizeExpr() ? 1 : 0); if (TL.getSizeExpr()) Record.AddStmt(TL.getSizeExpr()); @@ -335,56 +344,56 @@ void TypeLocWriter::VisitDependentAddressSpaceTypeLoc( DependentAddressSpaceTypeLoc TL) { - Record.AddSourceLocation(TL.getAttrNameLoc()); + addSourceLocation(TL.getAttrNameLoc()); SourceRange range = TL.getAttrOperandParensRange(); - Record.AddSourceLocation(range.getBegin()); - Record.AddSourceLocation(range.getEnd()); + addSourceLocation(range.getBegin()); + addSourceLocation(range.getEnd()); Record.AddStmt(TL.getAttrExprOperand()); } void TypeLocWriter::VisitDependentSizedExtVectorTypeLoc( DependentSizedExtVectorTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitVectorTypeLoc(VectorTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitDependentVectorTypeLoc( DependentVectorTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitExtVectorTypeLoc(ExtVectorTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitConstantMatrixTypeLoc(ConstantMatrixTypeLoc TL) { - Record.AddSourceLocation(TL.getAttrNameLoc()); + addSourceLocation(TL.getAttrNameLoc()); SourceRange range = TL.getAttrOperandParensRange(); - Record.AddSourceLocation(range.getBegin()); - Record.AddSourceLocation(range.getEnd()); + addSourceLocation(range.getBegin()); + addSourceLocation(range.getEnd()); Record.AddStmt(TL.getAttrRowOperand()); Record.AddStmt(TL.getAttrColumnOperand()); } void TypeLocWriter::VisitDependentSizedMatrixTypeLoc( DependentSizedMatrixTypeLoc TL) { - Record.AddSourceLocation(TL.getAttrNameLoc()); + addSourceLocation(TL.getAttrNameLoc()); SourceRange range = TL.getAttrOperandParensRange(); - Record.AddSourceLocation(range.getBegin()); - Record.AddSourceLocation(range.getEnd()); + addSourceLocation(range.getBegin()); + addSourceLocation(range.getEnd()); Record.AddStmt(TL.getAttrRowOperand()); Record.AddStmt(TL.getAttrColumnOperand()); } void TypeLocWriter::VisitFunctionTypeLoc(FunctionTypeLoc TL) { - Record.AddSourceLocation(TL.getLocalRangeBegin()); - Record.AddSourceLocation(TL.getLParenLoc()); - Record.AddSourceLocation(TL.getRParenLoc()); - Record.AddSourceRange(TL.getExceptionSpecRange()); - Record.AddSourceLocation(TL.getLocalRangeEnd()); + addSourceLocation(TL.getLocalRangeBegin()); + addSourceLocation(TL.getLParenLoc()); + addSourceLocation(TL.getRParenLoc()); + addSourceRange(TL.getExceptionSpecRange()); + addSourceLocation(TL.getLocalRangeEnd()); for (unsigned i = 0, e = TL.getNumParams(); i != e; ++i) Record.AddDeclRef(TL.getParam(i)); } @@ -398,81 +407,81 @@ } void TypeLocWriter::VisitUnresolvedUsingTypeLoc(UnresolvedUsingTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitUsingTypeLoc(UsingTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitTypedefTypeLoc(TypedefTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitObjCTypeParamTypeLoc(ObjCTypeParamTypeLoc TL) { if (TL.getNumProtocols()) { - Record.AddSourceLocation(TL.getProtocolLAngleLoc()); - Record.AddSourceLocation(TL.getProtocolRAngleLoc()); + addSourceLocation(TL.getProtocolLAngleLoc()); + addSourceLocation(TL.getProtocolRAngleLoc()); } for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i) - Record.AddSourceLocation(TL.getProtocolLoc(i)); + addSourceLocation(TL.getProtocolLoc(i)); } void TypeLocWriter::VisitTypeOfExprTypeLoc(TypeOfExprTypeLoc TL) { - Record.AddSourceLocation(TL.getTypeofLoc()); - Record.AddSourceLocation(TL.getLParenLoc()); - Record.AddSourceLocation(TL.getRParenLoc()); + addSourceLocation(TL.getTypeofLoc()); + addSourceLocation(TL.getLParenLoc()); + addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitTypeOfTypeLoc(TypeOfTypeLoc TL) { - Record.AddSourceLocation(TL.getTypeofLoc()); - Record.AddSourceLocation(TL.getLParenLoc()); - Record.AddSourceLocation(TL.getRParenLoc()); + addSourceLocation(TL.getTypeofLoc()); + addSourceLocation(TL.getLParenLoc()); + addSourceLocation(TL.getRParenLoc()); Record.AddTypeSourceInfo(TL.getUnderlyingTInfo()); } void TypeLocWriter::VisitDecltypeTypeLoc(DecltypeTypeLoc TL) { - Record.AddSourceLocation(TL.getDecltypeLoc()); - Record.AddSourceLocation(TL.getRParenLoc()); + addSourceLocation(TL.getDecltypeLoc()); + addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitUnaryTransformTypeLoc(UnaryTransformTypeLoc TL) { - Record.AddSourceLocation(TL.getKWLoc()); - Record.AddSourceLocation(TL.getLParenLoc()); - Record.AddSourceLocation(TL.getRParenLoc()); + addSourceLocation(TL.getKWLoc()); + addSourceLocation(TL.getLParenLoc()); + addSourceLocation(TL.getRParenLoc()); Record.AddTypeSourceInfo(TL.getUnderlyingTInfo()); } void TypeLocWriter::VisitAutoTypeLoc(AutoTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); Record.push_back(TL.isConstrained()); if (TL.isConstrained()) { Record.AddNestedNameSpecifierLoc(TL.getNestedNameSpecifierLoc()); - Record.AddSourceLocation(TL.getTemplateKWLoc()); - Record.AddSourceLocation(TL.getConceptNameLoc()); + addSourceLocation(TL.getTemplateKWLoc()); + addSourceLocation(TL.getConceptNameLoc()); Record.AddDeclRef(TL.getFoundDecl()); - Record.AddSourceLocation(TL.getLAngleLoc()); - Record.AddSourceLocation(TL.getRAngleLoc()); + addSourceLocation(TL.getLAngleLoc()); + addSourceLocation(TL.getRAngleLoc()); for (unsigned I = 0; I < TL.getNumArgs(); ++I) Record.AddTemplateArgumentLocInfo(TL.getTypePtr()->getArg(I).getKind(), TL.getArgLocInfo(I)); } Record.push_back(TL.isDecltypeAuto()); if (TL.isDecltypeAuto()) - Record.AddSourceLocation(TL.getRParenLoc()); + addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitDeducedTemplateSpecializationTypeLoc( DeducedTemplateSpecializationTypeLoc TL) { - Record.AddSourceLocation(TL.getTemplateNameLoc()); + addSourceLocation(TL.getTemplateNameLoc()); } void TypeLocWriter::VisitRecordTypeLoc(RecordTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitEnumTypeLoc(EnumTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitAttributedTypeLoc(AttributedTypeLoc TL) { @@ -484,107 +493,107 @@ } void TypeLocWriter::VisitTemplateTypeParmTypeLoc(TemplateTypeParmTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitSubstTemplateTypeParmTypeLoc( SubstTemplateTypeParmTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitSubstTemplateTypeParmPackTypeLoc( SubstTemplateTypeParmPackTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitTemplateSpecializationTypeLoc( TemplateSpecializationTypeLoc TL) { - Record.AddSourceLocation(TL.getTemplateKeywordLoc()); - Record.AddSourceLocation(TL.getTemplateNameLoc()); - Record.AddSourceLocation(TL.getLAngleLoc()); - Record.AddSourceLocation(TL.getRAngleLoc()); + addSourceLocation(TL.getTemplateKeywordLoc()); + addSourceLocation(TL.getTemplateNameLoc()); + addSourceLocation(TL.getLAngleLoc()); + addSourceLocation(TL.getRAngleLoc()); for (unsigned i = 0, e = TL.getNumArgs(); i != e; ++i) Record.AddTemplateArgumentLocInfo(TL.getArgLoc(i).getArgument().getKind(), TL.getArgLoc(i).getLocInfo()); } void TypeLocWriter::VisitParenTypeLoc(ParenTypeLoc TL) { - Record.AddSourceLocation(TL.getLParenLoc()); - Record.AddSourceLocation(TL.getRParenLoc()); + addSourceLocation(TL.getLParenLoc()); + addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) { - Record.AddSourceLocation(TL.getExpansionLoc()); + addSourceLocation(TL.getExpansionLoc()); } void TypeLocWriter::VisitElaboratedTypeLoc(ElaboratedTypeLoc TL) { - Record.AddSourceLocation(TL.getElaboratedKeywordLoc()); + addSourceLocation(TL.getElaboratedKeywordLoc()); Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc()); } void TypeLocWriter::VisitInjectedClassNameTypeLoc(InjectedClassNameTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitDependentNameTypeLoc(DependentNameTypeLoc TL) { - Record.AddSourceLocation(TL.getElaboratedKeywordLoc()); + addSourceLocation(TL.getElaboratedKeywordLoc()); Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc()); - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitDependentTemplateSpecializationTypeLoc( DependentTemplateSpecializationTypeLoc TL) { - Record.AddSourceLocation(TL.getElaboratedKeywordLoc()); + addSourceLocation(TL.getElaboratedKeywordLoc()); Record.AddNestedNameSpecifierLoc(TL.getQualifierLoc()); - Record.AddSourceLocation(TL.getTemplateKeywordLoc()); - Record.AddSourceLocation(TL.getTemplateNameLoc()); - Record.AddSourceLocation(TL.getLAngleLoc()); - Record.AddSourceLocation(TL.getRAngleLoc()); + addSourceLocation(TL.getTemplateKeywordLoc()); + addSourceLocation(TL.getTemplateNameLoc()); + addSourceLocation(TL.getLAngleLoc()); + addSourceLocation(TL.getRAngleLoc()); for (unsigned I = 0, E = TL.getNumArgs(); I != E; ++I) Record.AddTemplateArgumentLocInfo(TL.getArgLoc(I).getArgument().getKind(), TL.getArgLoc(I).getLocInfo()); } void TypeLocWriter::VisitPackExpansionTypeLoc(PackExpansionTypeLoc TL) { - Record.AddSourceLocation(TL.getEllipsisLoc()); + addSourceLocation(TL.getEllipsisLoc()); } void TypeLocWriter::VisitObjCInterfaceTypeLoc(ObjCInterfaceTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitObjCObjectTypeLoc(ObjCObjectTypeLoc TL) { Record.push_back(TL.hasBaseTypeAsWritten()); - Record.AddSourceLocation(TL.getTypeArgsLAngleLoc()); - Record.AddSourceLocation(TL.getTypeArgsRAngleLoc()); + addSourceLocation(TL.getTypeArgsLAngleLoc()); + addSourceLocation(TL.getTypeArgsRAngleLoc()); for (unsigned i = 0, e = TL.getNumTypeArgs(); i != e; ++i) Record.AddTypeSourceInfo(TL.getTypeArgTInfo(i)); - Record.AddSourceLocation(TL.getProtocolLAngleLoc()); - Record.AddSourceLocation(TL.getProtocolRAngleLoc()); + addSourceLocation(TL.getProtocolLAngleLoc()); + addSourceLocation(TL.getProtocolRAngleLoc()); for (unsigned i = 0, e = TL.getNumProtocols(); i != e; ++i) - Record.AddSourceLocation(TL.getProtocolLoc(i)); + addSourceLocation(TL.getProtocolLoc(i)); } void TypeLocWriter::VisitObjCObjectPointerTypeLoc(ObjCObjectPointerTypeLoc TL) { - Record.AddSourceLocation(TL.getStarLoc()); + addSourceLocation(TL.getStarLoc()); } void TypeLocWriter::VisitAtomicTypeLoc(AtomicTypeLoc TL) { - Record.AddSourceLocation(TL.getKWLoc()); - Record.AddSourceLocation(TL.getLParenLoc()); - Record.AddSourceLocation(TL.getRParenLoc()); + addSourceLocation(TL.getKWLoc()); + addSourceLocation(TL.getLParenLoc()); + addSourceLocation(TL.getRParenLoc()); } void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) { - Record.AddSourceLocation(TL.getKWLoc()); + addSourceLocation(TL.getKWLoc()); } void TypeLocWriter::VisitBitIntTypeLoc(clang::BitIntTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void TypeLocWriter::VisitDependentBitIntTypeLoc( clang::DependentBitIntTypeLoc TL) { - Record.AddSourceLocation(TL.getNameLoc()); + addSourceLocation(TL.getNameLoc()); } void ASTWriter::WriteTypeAbbrevs() { @@ -5214,14 +5223,15 @@ Record.push_back(Raw); } -void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record) { - SourceLocation::UIntTy Raw = Loc.getRawEncoding(); - Record.push_back((Raw << 1) | (Raw >> (8 * sizeof(Raw) - 1))); +void ASTWriter::AddSourceLocation(SourceLocation Loc, RecordDataImpl &Record, + SourceLocationSequence *Seq) { + Record.push_back(SourceLocationEncoding::encode(Loc, Seq)); } -void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record) { - AddSourceLocation(Range.getBegin(), Record); - AddSourceLocation(Range.getEnd(), Record); +void ASTWriter::AddSourceRange(SourceRange Range, RecordDataImpl &Record, + SourceLocationSequence *Seq) { + AddSourceLocation(Range.getBegin(), Record, Seq); + AddSourceLocation(Range.getEnd(), Record, Seq); } void ASTRecordWriter::AddAPFloat(const llvm::APFloat &Value) { @@ -5348,8 +5358,9 @@ AddTypeLoc(TInfo->getTypeLoc()); } -void ASTRecordWriter::AddTypeLoc(TypeLoc TL) { - TypeLocWriter TLW(*this); +void ASTRecordWriter::AddTypeLoc(TypeLoc TL, LocSeq *OuterSeq) { + LocSeq::State Seq(OuterSeq); + TypeLocWriter TLW(*this, Seq); for (; !TL.isNull(); TL = TL.getNextTypeLoc()) TLW.Visit(TL); } diff --git a/clang/unittests/Serialization/CMakeLists.txt b/clang/unittests/Serialization/CMakeLists.txt --- a/clang/unittests/Serialization/CMakeLists.txt +++ b/clang/unittests/Serialization/CMakeLists.txt @@ -7,6 +7,7 @@ add_clang_unittest(SerializationTests InMemoryModuleCacheTest.cpp ModuleCacheTest.cpp + SourceLocationEncodingTest.cpp ) clang_target_link_libraries(SerializationTests diff --git a/clang/unittests/Serialization/SourceLocationEncodingTest.cpp b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Serialization/SourceLocationEncodingTest.cpp @@ -0,0 +1,103 @@ +//===- unittests/Serialization/SourceLocationEncodingTests.cpp ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Serialization/SourceLocationEncoding.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace clang; + +namespace { +using LocSeq = SourceLocationSequence; + +// Convert a single source location into encoded form and back. +// If ExpectedEncoded is provided, verify the encoded value too. +// Loc is the raw (in-memory) form of SourceLocation. +void roundTrip(SourceLocation::UIntTy Loc, + llvm::Optional ExpectedEncoded = llvm::None) { + uint64_t ActualEncoded = + SourceLocationEncoding::encode(SourceLocation::getFromRawEncoding(Loc)); + if (ExpectedEncoded) + ASSERT_EQ(ActualEncoded, *ExpectedEncoded) << "Encoding " << Loc; + SourceLocation::UIntTy DecodedEncoded = + SourceLocationEncoding::decode(ActualEncoded).getRawEncoding(); + ASSERT_EQ(DecodedEncoded, Loc) << "Decoding " << ActualEncoded; +} + +// As above, but use sequence encoding for a series of locations. +void roundTrip(std::vector Locs, + std::vector ExpectedEncoded = {}) { + std::vector ActualEncoded; + { + LocSeq::State Seq; + for (auto L : Locs) + ActualEncoded.push_back(SourceLocationEncoding::encode( + SourceLocation::getFromRawEncoding(L), Seq)); + if (!ExpectedEncoded.empty()) + ASSERT_EQ(ActualEncoded, ExpectedEncoded) + << "Encoding " << testing::PrintToString(Locs); + } + std::vector DecodedEncoded; + { + LocSeq::State Seq; + for (auto L : ActualEncoded) { + SourceLocation Loc = SourceLocationEncoding::decode(L, Seq); + DecodedEncoded.push_back(Loc.getRawEncoding()); + } + ASSERT_EQ(DecodedEncoded, Locs) + << "Decoding " << testing::PrintToString(ActualEncoded); + } +} + +constexpr SourceLocation::UIntTy MacroBit = + 1 << (sizeof(SourceLocation::UIntTy) * CHAR_BIT - 1); +constexpr SourceLocation::UIntTy Big = MacroBit >> 1; +constexpr SourceLocation::UIntTy Biggest = -1; + +TEST(SourceLocationEncoding, Individual) { + roundTrip(1, 2); + roundTrip(100, 200); + roundTrip(MacroBit, 1); + roundTrip(MacroBit | 5, 11); + roundTrip(Big); + roundTrip(Big + 1); + roundTrip(MacroBit | Big); + roundTrip(MacroBit | Big + 1); +} + +TEST(SourceLocationEncoding, Sequence) { + roundTrip({1, 2, 3, 3, 2, 1}, + {2, // 1 + 5, // +2 (+1 of non-raw) + 5, // +2 + 1, // +0 + 4, // -2 + 4} // -2 + ); + roundTrip({100, 0, 100}, + {200, // 100 + 0, // 0 + 1} // +0 + ); + + roundTrip({1, Big}, {2, ((Big - 1) << 2) + 1}); + roundTrip({2, MacroBit | Big}, {4, ((Big - 1) << 2) - 1}); + + roundTrip({3, MacroBit | 5, MacroBit | 4, 3}, + {6, // 3 + 11, // +5 (+2 of non-raw + set macro bit) + 4, // -2 + 6} // -3 (-2 of non-raw, clear macro bit) + ); + + roundTrip( + {123 | MacroBit, 1, 9, Biggest, Big, Big + 1, 0, MacroBit | Big, 0}); +} + +} // namespace