Index: include/llvm/Support/SourceMgr.h =================================================================== --- include/llvm/Support/SourceMgr.h +++ include/llvm/Support/SourceMgr.h @@ -18,6 +18,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/None.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" @@ -57,8 +58,38 @@ /// The memory buffer for the file. std::unique_ptr Buffer; + /// Helper type for OffsetCache below: since we're storing many offsets + /// into relatively small files (often smaller than 2^8 or 2^16 bytes), + /// we select the offset vector element type dynamically based on the + /// size of Buffer. + using VariableSizeOffsets = PointerUnion4 *, + std::vector *, + std::vector *, + std::vector *>; + + /// Vector of offsets into Buffer at which there are line-endings + /// (lazily populated). Once populated, the '\n' that marks the end of + /// line number N from [1..] is at Buffer[OffsetCache[N-1]]. Since + /// these offsets are in sorted (ascending) order, they can be + /// binary-searched for the first one after any given offset (eg. an + /// offset corresponding to a particular SMLoc). + mutable VariableSizeOffsets OffsetCache; + + /// Populate \c OffsetCache and look up a given \p Ptr in it, assuming + /// it points somewhere into \c Buffer. The static type parameter \p T + /// must be an unsigned integer type from uint{8,16,32,64}_t large + /// enough to store offsets inside \c Buffer. + template + unsigned getLineNumber(const char *Ptr) const; + /// This is the location of the parent include, or null if at the top level. SMLoc IncludeLoc; + + SrcBuffer() = default; + SrcBuffer(SrcBuffer &&); + SrcBuffer(const SrcBuffer &) = delete; + SrcBuffer &operator=(const SrcBuffer &) = delete; + ~SrcBuffer(); }; /// This is all of the buffers that we are reading from. @@ -67,10 +98,6 @@ // This is the list of directories we should search for include files in. std::vector IncludeDirectories; - /// This is a cache for line number queries, its implementation is really - /// private to SourceMgr.cpp. - mutable void *LineNoCache = nullptr; - DiagHandlerTy DiagHandler = nullptr; void *DiagContext = nullptr; @@ -80,7 +107,7 @@ SourceMgr() = default; SourceMgr(const SourceMgr &) = delete; SourceMgr &operator=(const SourceMgr &) = delete; - ~SourceMgr(); + ~SourceMgr() = default; void setIncludeDirs(const std::vector &Dirs) { IncludeDirectories = Dirs; Index: lib/MC/MCParser/AsmParser.cpp =================================================================== --- lib/MC/MCParser/AsmParser.cpp +++ lib/MC/MCParser/AsmParser.cpp @@ -168,14 +168,6 @@ /// \brief List of forward directional labels for diagnosis at the end. SmallVector, 4> DirLabels; - /// When generating dwarf for assembly source files we need to calculate the - /// logical line number based on the last parsed cpp hash file line comment - /// and current line. Since this is slow and messes up the SourceMgr's - /// cache we save the last info we queried with SrcMgr.FindLineNumber(). - SMLoc LastQueryIDLoc; - unsigned LastQueryBuffer; - unsigned LastQueryLine; - /// AssemblerDialect. ~OU means unset value and use value provided by MAI. unsigned AssemblerDialect = ~0U; @@ -2189,20 +2181,8 @@ 0, StringRef(), CppHashInfo.Filename); getContext().setGenDwarfFileNumber(FileNumber); - // Since SrcMgr.FindLineNumber() is slow and messes up the SourceMgr's - // cache with the different Loc from the call above we save the last - // info we queried here with SrcMgr.FindLineNumber(). - unsigned CppHashLocLineNo; - if (LastQueryIDLoc == CppHashInfo.Loc && - LastQueryBuffer == CppHashInfo.Buf) - CppHashLocLineNo = LastQueryLine; - else { - CppHashLocLineNo = - SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf); - LastQueryLine = CppHashLocLineNo; - LastQueryIDLoc = CppHashInfo.Loc; - LastQueryBuffer = CppHashInfo.Buf; - } + unsigned CppHashLocLineNo = + SrcMgr.FindLineNumber(CppHashInfo.Loc, CppHashInfo.Buf); Line = CppHashInfo.LineNumber - 1 + (Line - CppHashLocLineNo); } Index: lib/Support/SourceMgr.cpp =================================================================== --- lib/Support/SourceMgr.cpp +++ lib/Support/SourceMgr.cpp @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -36,24 +37,6 @@ static const size_t TabStop = 8; -namespace { - - struct LineNoCacheTy { - const char *LastQuery; - unsigned LastQueryBufferID; - unsigned LineNoOfQuery; - }; - -} // end anonymous namespace - -static LineNoCacheTy *getCache(void *Ptr) { - return (LineNoCacheTy*)Ptr; -} - -SourceMgr::~SourceMgr() { - delete getCache(LineNoCache); -} - unsigned SourceMgr::AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc, std::string &IncludedFile) { @@ -85,46 +68,86 @@ return 0; } -std::pair -SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { - if (!BufferID) - BufferID = FindBufferContainingLoc(Loc); - assert(BufferID && "Invalid Location!"); +template +unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const { + + // Ensure OffsetCache is allocated and populated with offsets of all the + // '\n' bytes. + std::vector *Offsets = nullptr; + if (OffsetCache.isNull()) { + Offsets = new std::vector(); + OffsetCache = Offsets; + size_t Sz = Buffer->getBufferSize(); + assert(Sz <= std::numeric_limits::max()); + StringRef S = Buffer->getBuffer(); + for (size_t N = 0; N < Sz; ++N) { + if (S[N] == '\n') { + Offsets->push_back(static_cast(N)); + } + } + } else { + Offsets = OffsetCache.get *>(); + } - const MemoryBuffer *Buff = getMemoryBuffer(BufferID); + const char *BufStart = Buffer->getBufferStart(); + assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd()); + ptrdiff_t PtrDiff = Ptr - BufStart; + assert(PtrDiff >= 0 && static_cast(PtrDiff) <= std::numeric_limits::max()); + T PtrOffset = static_cast(PtrDiff); - // Count the number of \n's between the start of the file and the specified - // location. - unsigned LineNo = 1; + // std::lower_bound returns the first EOL offset that's not-less-than + // PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on + // (including if PtrOffset refers to the EOL itself). If there's no such + // EOL, returns end(). + auto EOL = std::lower_bound(Offsets->begin(), Offsets->end(), PtrOffset); - const char *BufStart = Buff->getBufferStart(); - const char *Ptr = BufStart; + // Lines count from 1, so add 1 to the distance from the 0th line. + return (1 + (EOL - Offsets->begin())); +} - // If we have a line number cache, and if the query is to a later point in the - // same file, start searching from the last query location. This optimizes - // for the case when multiple diagnostics come out of one file in order. - if (LineNoCacheTy *Cache = getCache(LineNoCache)) - if (Cache->LastQueryBufferID == BufferID && - Cache->LastQuery <= Loc.getPointer()) { - Ptr = Cache->LastQuery; - LineNo = Cache->LineNoOfQuery; - } +SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other) + : Buffer(std::move(Other.Buffer)), + OffsetCache(Other.OffsetCache), + IncludeLoc(Other.IncludeLoc) { + Other.OffsetCache = nullptr; +} - // Scan for the location being queried, keeping track of the number of lines - // we see. - for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr) - if (*Ptr == '\n') ++LineNo; +SourceMgr::SrcBuffer::~SrcBuffer() { + if (!OffsetCache.isNull()) { + if (OffsetCache.is*>()) + delete OffsetCache.get*>(); + else if (OffsetCache.is*>()) + delete OffsetCache.get*>(); + else if (OffsetCache.is*>()) + delete OffsetCache.get*>(); + else + delete OffsetCache.get*>(); + OffsetCache = nullptr; + } +} - // Allocate the line number cache if it doesn't exist. - if (!LineNoCache) - LineNoCache = new LineNoCacheTy(); +std::pair +SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { + if (!BufferID) + BufferID = FindBufferContainingLoc(Loc); + assert(BufferID && "Invalid Location!"); - // Update the line # cache. - LineNoCacheTy &Cache = *getCache(LineNoCache); - Cache.LastQueryBufferID = BufferID; - Cache.LastQuery = Ptr; - Cache.LineNoOfQuery = LineNo; - + auto &SB = getBufferInfo(BufferID); + const char *Ptr = Loc.getPointer(); + + size_t Sz = SB.Buffer->getBufferSize(); + assert(Sz <= std::numeric_limits::max()); + unsigned LineNo; + if (Sz <= std::numeric_limits::max()) + LineNo = SB.getLineNumber(Ptr); + else if (Sz <= std::numeric_limits::max()) + LineNo = SB.getLineNumber(Ptr); + else if (Sz <= std::numeric_limits::max()) + LineNo = SB.getLineNumber(Ptr); + else + LineNo = SB.getLineNumber(Ptr); + + const char *BufStart = SB.Buffer->getBufferStart(); size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r"); if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0; return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs); Index: unittests/Support/SourceMgrTest.cpp =================================================================== --- unittests/Support/SourceMgrTest.cpp +++ unittests/Support/SourceMgrTest.cpp @@ -107,6 +107,320 @@ Output); } +TEST_F(SourceMgrTest, LocationAtEmptyBuffer) { + setMainBuffer("", "file.in"); + printMessage(getLoc(0), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:1:1: error: message\n" + "\n" + "^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationJustOnSoleNewline) { + setMainBuffer("\n", "file.in"); + printMessage(getLoc(0), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:1:1: error: message\n" + "\n" + "^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationJustAfterSoleNewline) { + setMainBuffer("\n", "file.in"); + printMessage(getLoc(1), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:2:1: error: message\n" + "\n" + "^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationJustAfterNonNewline) { + setMainBuffer("123", "file.in"); + printMessage(getLoc(3), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:1:4: error: message\n" + "123\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationOnFirstLineOfMultiline) { + setMainBuffer("1234\n6789\n", "file.in"); + printMessage(getLoc(3), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:1:4: error: message\n" + "1234\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationOnEOLOfFirstLineOfMultiline) { + setMainBuffer("1234\n6789\n", "file.in"); + printMessage(getLoc(4), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:1:5: error: message\n" + "1234\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationOnSecondLineOfMultiline) { + setMainBuffer("1234\n6789\n", "file.in"); + printMessage(getLoc(5), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:2:1: error: message\n" + "6789\n" + "^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationOnSecondLineOfMultilineNoSecondEOL) { + setMainBuffer("1234\n6789", "file.in"); + printMessage(getLoc(5), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:2:1: error: message\n" + "6789\n" + "^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationOnEOLOfSecondSecondLineOfMultiline) { + setMainBuffer("1234\n6789\n", "file.in"); + printMessage(getLoc(9), SourceMgr::DK_Error, "message", None, None); + + EXPECT_EQ("file.in:2:5: error: message\n" + "6789\n" + " ^\n", + Output); +} + +#define STRING_LITERAL_253_BYTES \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n1234567890\n" \ + "1234567890\n" + +//===----------------------------------------------------------------------===// +// 255-byte buffer tests +//===----------------------------------------------------------------------===// + +TEST_F(SourceMgrTest, LocationBeforeEndOf255ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "12" // + 2 = 255 bytes + , "file.in"); + printMessage(getLoc(253), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:1: error: message\n" + "12\n" + "^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationAtEndOf255ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "12" // + 2 = 255 bytes + , "file.in"); + printMessage(getLoc(254), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:2: error: message\n" + "12\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationPastEndOf255ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "12" // + 2 = 255 bytes + , "file.in"); + printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:3: error: message\n" + "12\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationBeforeEndOf255ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "1\n" // + 2 = 255 bytes + , "file.in"); + printMessage(getLoc(253), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:1: error: message\n" + "1\n" + "^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationAtEndOf255ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "1\n" // + 2 = 255 bytes + , "file.in"); + printMessage(getLoc(254), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:2: error: message\n" + "1\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationPastEndOf255ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "1\n" // + 2 = 255 bytes + , "file.in"); + printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:25:1: error: message\n" + "\n" + "^\n", + Output); +} + +//===----------------------------------------------------------------------===// +// 256-byte buffer tests +//===----------------------------------------------------------------------===// + +TEST_F(SourceMgrTest, LocationBeforeEndOf256ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "123" // + 3 = 256 bytes + , "file.in"); + printMessage(getLoc(254), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:2: error: message\n" + "123\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationAtEndOf256ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "123" // + 3 = 256 bytes + , "file.in"); + printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:3: error: message\n" + "123\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationPastEndOf256ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "123" // + 3 = 256 bytes + , "file.in"); + printMessage(getLoc(256), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:4: error: message\n" + "123\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationBeforeEndOf256ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "12\n" // + 3 = 256 bytes + , "file.in"); + printMessage(getLoc(254), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:2: error: message\n" + "12\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationAtEndOf256ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "12\n" // + 3 = 256 bytes + , "file.in"); + printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:3: error: message\n" + "12\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationPastEndOf256ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "12\n" // + 3 = 256 bytes + , "file.in"); + printMessage(getLoc(256), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:25:1: error: message\n" + "\n" + "^\n", + Output); +} + +//===----------------------------------------------------------------------===// +// 257-byte buffer tests +//===----------------------------------------------------------------------===// + +TEST_F(SourceMgrTest, LocationBeforeEndOf257ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "1234" // + 4 = 257 bytes + , "file.in"); + printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:3: error: message\n" + "1234\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationAtEndOf257ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "1234" // + 4 = 257 bytes + , "file.in"); + printMessage(getLoc(256), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:4: error: message\n" + "1234\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationPastEndOf257ByteBuffer) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "1234" // + 4 = 257 bytes + , "file.in"); + printMessage(getLoc(257), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:5: error: message\n" + "1234\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationBeforeEndOf257ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "123\n" // + 4 = 257 bytes + , "file.in"); + printMessage(getLoc(255), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:3: error: message\n" + "123\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationAtEndOf257ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "123\n" // + 4 = 257 bytes + , "file.in"); + printMessage(getLoc(256), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:24:4: error: message\n" + "123\n" + " ^\n", + Output); +} + +TEST_F(SourceMgrTest, LocationPastEndOf257ByteBufferEndingInNewline) { + setMainBuffer(STRING_LITERAL_253_BYTES // first 253 bytes + "123\n" // + 4 = 257 bytes + , "file.in"); + printMessage(getLoc(257), SourceMgr::DK_Error, "message", None, None); + EXPECT_EQ("file.in:25:1: error: message\n" + "\n" + "^\n", + Output); +} + TEST_F(SourceMgrTest, BasicRange) { setMainBuffer("aaa bbb\nccc ddd\n", "file.in"); printMessage(getLoc(4), SourceMgr::DK_Error, "message", getRange(4, 3), None);