diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -90,6 +90,35 @@ return CK == C_User_ModuleMap || CK == C_System_ModuleMap; } + /// Mapping of line offsets into a source file. This does not own the storage + /// for the line numbers. + class LineOffsetMapping { + public: + explicit operator bool() const { return Storage; } + unsigned size() const { + assert(Storage); + return Storage[0]; + } + ArrayRef getLines() const { + assert(Storage); + return ArrayRef(Storage + 1, Storage + 1 + size()); + } + const unsigned *begin() const { return getLines().begin(); } + const unsigned *end() const { return getLines().end(); } + const unsigned &operator[](int I) const { return getLines()[I]; } + + static LineOffsetMapping get(llvm::MemoryBufferRef Buffer, + llvm::BumpPtrAllocator &Alloc); + + LineOffsetMapping() = default; + LineOffsetMapping(ArrayRef LineOffsets, + llvm::BumpPtrAllocator &Alloc); + + private: + /// First element is the size, followed by elements at off-by-one indexes. + unsigned *Storage = nullptr; + }; + /// One instance of this struct is kept for every file loaded or used. /// /// This object owns the MemoryBuffer object. @@ -115,14 +144,9 @@ /// A bump pointer allocated array of offsets for each source line. /// - /// This is lazily computed. This is owned by the SourceManager + /// This is lazily computed. The lines are owned by the SourceManager /// BumpPointerAllocator object. - unsigned *SourceLineCache = nullptr; - - /// The number of lines in this ContentCache. - /// - /// This is only valid if SourceLineCache is non-null. - unsigned NumLines = 0; + LineOffsetMapping SourceLineCache; /// Indicates whether the buffer itself was provided to override /// the actual file contents. @@ -157,10 +181,8 @@ OrigEntry = RHS.OrigEntry; ContentsEntry = RHS.ContentsEntry; - assert(!RHS.Buffer && RHS.SourceLineCache == nullptr && + assert(!RHS.Buffer && !RHS.SourceLineCache && "Passed ContentCache object cannot own a buffer."); - - NumLines = RHS.NumLines; } ContentCache &operator=(const ContentCache& RHS) = delete; diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -1200,10 +1200,10 @@ const char *Buf = MemBuf->getBufferStart(); // See if we just calculated the line number for this FilePos and can use // that to lookup the start of the line instead of searching for it. - if (LastLineNoFileIDQuery == FID && - LastLineNoContentCache->SourceLineCache != nullptr && - LastLineNoResult < LastLineNoContentCache->NumLines) { - unsigned *SourceLineCache = LastLineNoContentCache->SourceLineCache; + if (LastLineNoFileIDQuery == FID && LastLineNoContentCache->SourceLineCache && + LastLineNoResult < LastLineNoContentCache->SourceLineCache.size()) { + const unsigned *SourceLineCache = + LastLineNoContentCache->SourceLineCache.begin(); unsigned LineStart = SourceLineCache[LastLineNoResult - 1]; unsigned LineEnd = SourceLineCache[LastLineNoResult]; if (FilePos >= LineStart && FilePos < LineEnd) { @@ -1274,6 +1274,11 @@ if (Invalid) return; + FI->SourceLineCache = LineOffsetMapping::get(*Buffer, Alloc); +} + +LineOffsetMapping LineOffsetMapping::get(llvm::MemoryBufferRef Buffer, + llvm::BumpPtrAllocator &Alloc) { // Find the file offsets of all of the *physical* source lines. This does // not look at trigraphs, escaped newlines, or anything else tricky. SmallVector LineOffsets; @@ -1281,8 +1286,8 @@ // Line #1 starts at char 0. LineOffsets.push_back(0); - const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); - const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); + const unsigned char *Buf = (const unsigned char *)Buffer.getBufferStart(); + const unsigned char *End = (const unsigned char *)Buffer.getBufferEnd(); const std::size_t BufLen = End - Buf; unsigned I = 0; while (I < BufLen) { @@ -1297,10 +1302,14 @@ ++I; } - // Copy the offsets into the FileInfo structure. - FI->NumLines = LineOffsets.size(); - FI->SourceLineCache = Alloc.Allocate(LineOffsets.size()); - std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); + return LineOffsetMapping(LineOffsets, Alloc); +} + +LineOffsetMapping::LineOffsetMapping(ArrayRef LineOffsets, + llvm::BumpPtrAllocator &Alloc) + : Storage(Alloc.Allocate(LineOffsets.size() + 1)) { + Storage[0] = LineOffsets.size(); + std::copy(LineOffsets.begin(), LineOffsets.end(), Storage + 1); } /// getLineNumber - Given a SourceLocation, return the spelling line number @@ -1344,9 +1353,9 @@ // Okay, we know we have a line number table. Do a binary search to find the // line number that this character position lands on. - unsigned *SourceLineCache = Content->SourceLineCache; - unsigned *SourceLineCacheStart = SourceLineCache; - unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; + const unsigned *SourceLineCache = Content->SourceLineCache.begin(); + const unsigned *SourceLineCacheStart = SourceLineCache; + const unsigned *SourceLineCacheEnd = Content->SourceLineCache.end(); unsigned QueriedFilePos = FilePos+1; @@ -1385,13 +1394,13 @@ } } } else { - if (LastLineNoResult < Content->NumLines) + if (LastLineNoResult < Content->SourceLineCache.size()) SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; } } - unsigned *Pos - = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); + const unsigned *Pos = + std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); unsigned LineNo = Pos-SourceLineCacheStart; LastLineNoFileIDQuery = FID; @@ -1693,7 +1702,7 @@ if (!Buffer) return SourceLocation(); - if (Line > Content->NumLines) { + if (Line > Content->SourceLineCache.size()) { unsigned Size = Buffer->getBufferSize(); if (Size > 0) --Size; @@ -2105,7 +2114,7 @@ unsigned NumLineNumsComputed = 0; unsigned NumFileBytesMapped = 0; for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){ - NumLineNumsComputed += I->second->SourceLineCache != nullptr; + NumLineNumsComputed += bool(I->second->SourceLineCache); NumFileBytesMapped += I->second->getSizeBytesMapped(); } unsigned NumMacroArgsComputed = MacroArgsCacheMap.size(); diff --git a/clang/lib/Lex/ScratchBuffer.cpp b/clang/lib/Lex/ScratchBuffer.cpp --- a/clang/lib/Lex/ScratchBuffer.cpp +++ b/clang/lib/Lex/ScratchBuffer.cpp @@ -41,7 +41,7 @@ &SourceMgr.getSLocEntry(SourceMgr.getFileID(BufferStartLoc)) .getFile() .getContentCache()); - ContentCache->SourceLineCache = nullptr; + ContentCache->SourceLineCache = SrcMgr::LineOffsetMapping(); } // Prefix the token with a \n, so that it looks like it is the first thing on diff --git a/clang/unittests/Basic/CMakeLists.txt b/clang/unittests/Basic/CMakeLists.txt --- a/clang/unittests/Basic/CMakeLists.txt +++ b/clang/unittests/Basic/CMakeLists.txt @@ -6,6 +6,7 @@ CharInfoTest.cpp DiagnosticTest.cpp FileManagerTest.cpp + LineOffsetMappingTest.cpp SourceManagerTest.cpp ) diff --git a/clang/unittests/Basic/LineOffsetMappingTest.cpp b/clang/unittests/Basic/LineOffsetMappingTest.cpp new file mode 100644 --- /dev/null +++ b/clang/unittests/Basic/LineOffsetMappingTest.cpp @@ -0,0 +1,79 @@ +//===- unittests/Basic/LineOffsetMappingTest.cpp - Test LineOffsetMapping -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Basic/SourceManager.h" +#include "gtest/gtest.h" + +using namespace clang; +using namespace clang::SrcMgr; +using namespace llvm; + +namespace { + +TEST(LineOffsetMappingTest, empty) { + LineOffsetMapping Mapping; + EXPECT_FALSE(Mapping); + +#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST + EXPECT_DEATH((void)Mapping.getLines(), "Assertion"); +#endif +} + +TEST(LineOffsetMappingTest, construct) { + BumpPtrAllocator Alloc; + unsigned Offsets[] = {0, 10, 20}; + LineOffsetMapping Mapping(Offsets, Alloc); + EXPECT_EQ(3u, Mapping.size()); + EXPECT_EQ(0u, Mapping[0]); + EXPECT_EQ(10u, Mapping[1]); + EXPECT_EQ(20u, Mapping[2]); + +#if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST + EXPECT_DEATH((void)Mapping[3], "Assertion"); +#endif +} + +TEST(LineOffsetMappingTest, constructTwo) { + // Confirm allocation size is big enough, convering an off-by-one bug. + BumpPtrAllocator Alloc; + unsigned Offsets1[] = {0, 10}; + unsigned Offsets2[] = {0, 20}; + LineOffsetMapping Mapping1(Offsets1, Alloc); + LineOffsetMapping Mapping2(Offsets2, Alloc); + + // Need to check Mapping1 *after* building Mapping2. + EXPECT_EQ(2u, Mapping1.size()); + EXPECT_EQ(0u, Mapping1[0]); + EXPECT_EQ(10u, Mapping1[1]); + EXPECT_EQ(2u, Mapping2.size()); + EXPECT_EQ(0u, Mapping2[0]); + EXPECT_EQ(20u, Mapping2[1]); +} + +TEST(LineOffsetMappingTest, get) { + BumpPtrAllocator Alloc; + StringRef Source = "first line\n" + "second line\n"; + auto Mapping = LineOffsetMapping::get(MemoryBufferRef(Source, ""), Alloc); + EXPECT_EQ(3u, Mapping.size()); + EXPECT_EQ(0u, Mapping[0]); + EXPECT_EQ(11u, Mapping[1]); + EXPECT_EQ(23u, Mapping[2]); +} + +TEST(LineOffsetMappingTest, getMissingFinalNewline) { + BumpPtrAllocator Alloc; + StringRef Source = "first line\n" + "second line"; + auto Mapping = LineOffsetMapping::get(MemoryBufferRef(Source, ""), Alloc); + EXPECT_EQ(2u, Mapping.size()); + EXPECT_EQ(0u, Mapping[0]); + EXPECT_EQ(11u, Mapping[1]); +} + +} // end namespace