Index: clang-tools-extra/trunk/clangd/DraftStore.cpp =================================================================== --- clang-tools-extra/trunk/clangd/DraftStore.cpp +++ clang-tools-extra/trunk/clangd/DraftStore.cpp @@ -77,8 +77,17 @@ End, Start), llvm::errc::invalid_argument); - if (Change.rangeLength && - (ssize_t)(*EndIndex - *StartIndex) != *Change.rangeLength) + // Since the range length between two LSP positions is dependent on the + // contents of the buffer we compute the range length between the start and + // end position ourselves and compare it to the range length of the LSP + // message to verify the buffers of the client and server are in sync. + + // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16 + // code units. + ssize_t ComputedRangeLength = + lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex)); + + if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength) return make_error( formatv("Change's rangeLength ({0}) doesn't match the " "computed range length ({1}).", Index: clang-tools-extra/trunk/clangd/SourceCode.h =================================================================== --- clang-tools-extra/trunk/clangd/SourceCode.h +++ clang-tools-extra/trunk/clangd/SourceCode.h @@ -23,6 +23,10 @@ namespace clangd { +// Counts the number of UTF-16 code units needed to represent a string (LSP +// specifies string lengths in UTF-16 code units). +size_t lspLength(StringRef Code); + /// Turn a [line, column] pair into an offset in Code. /// /// If P.character exceeds the line length, returns the offset at end-of-line. Index: clang-tools-extra/trunk/clangd/SourceCode.cpp =================================================================== --- clang-tools-extra/trunk/clangd/SourceCode.cpp +++ clang-tools-extra/trunk/clangd/SourceCode.cpp @@ -67,13 +67,12 @@ return std::min(Result, U8.size()); } -// Counts the number of UTF-16 code units needed to represent a string. // Like most strings in clangd, the input is UTF-8 encoded. -static size_t utf16Len(StringRef U8) { +size_t lspLength(StringRef Code) { // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. size_t Count = 0; - iterateCodepoints(U8, [&](int U8Len, int U16Len) { + iterateCodepoints(Code, [&](int U8Len, int U16Len) { Count += U16Len; return false; }); @@ -123,7 +122,7 @@ size_t StartOfLine = (PrevNL == StringRef::npos) ? 0 : (PrevNL + 1); Position Pos; Pos.line = Lines; - Pos.character = utf16Len(Before.substr(StartOfLine)); + Pos.character = lspLength(Before.substr(StartOfLine)); return Pos; } @@ -139,7 +138,7 @@ if (!Invalid) { auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1; auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes); - P.character = utf16Len(LineSoFar); + P.character = lspLength(LineSoFar); } return P; } Index: clang-tools-extra/trunk/unittests/clangd/SourceCodeTests.cpp =================================================================== --- clang-tools-extra/trunk/unittests/clangd/SourceCodeTests.cpp +++ clang-tools-extra/trunk/unittests/clangd/SourceCodeTests.cpp @@ -42,6 +42,16 @@ return range; } +TEST(SourceCodeTests, lspLength) { + EXPECT_EQ(lspLength(""), 0UL); + EXPECT_EQ(lspLength("ascii"), 5UL); + // BMP + EXPECT_EQ(lspLength("↓"), 1UL); + EXPECT_EQ(lspLength("¥"), 1UL); + // astral + EXPECT_EQ(lspLength("😂"), 2UL); +} + TEST(SourceCodeTests, PositionToOffset) { // line out of bounds EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), Failed());