Index: clang-tidy/tool/clang-tidy-diff.py =================================================================== --- clang-tidy/tool/clang-tidy-diff.py +++ clang-tidy/tool/clang-tidy-diff.py @@ -36,11 +36,10 @@ import threading import traceback -yaml_imported = True try: import yaml except ImportError: - yaml_imported = False + yaml = None is_py2 = sys.version[0] == '2' @@ -144,7 +143,7 @@ default='') parser.add_argument('-path', dest='build_path', help='Path used to read a compile command database.') - if yaml_imported: + if yaml: parser.add_argument('-export-fixes', metavar='FILE', dest='export_fixes', help='Create a yaml file to store suggested fixes in, ' 'which can be applied with clang-apply-replacements.') @@ -204,7 +203,7 @@ max_task_count = min(len(lines_by_file), max_task_count) tmpdir = None - if yaml_imported and args.export_fixes: + if yaml and args.export_fixes: tmpdir = tempfile.mkdtemp() # Tasks for clang-tidy. @@ -238,7 +237,7 @@ # Run clang-tidy on files containing changes. command = [args.clang_tidy_binary] command.append('-line-filter=' + line_filter_json) - if yaml_imported and args.export_fixes: + if yaml and args.export_fixes: # Get a temporary file. We immediately close the handle so clang-tidy can # overwrite it. (handle, tmp_name) = tempfile.mkstemp(suffix='.yaml', dir=tmpdir) @@ -253,7 +252,7 @@ # Wait for all threads to be done. task_queue.join() - if yaml_imported and args.export_fixes: + if yaml and args.export_fixes: print('Writing fixes to ' + args.export_fixes + ' ...') try: merge_replacement_files(tmpdir, args.export_fixes) Index: clang-tidy/tool/run-clang-tidy.py =================================================================== --- clang-tidy/tool/run-clang-tidy.py +++ clang-tidy/tool/run-clang-tidy.py @@ -47,7 +47,11 @@ import tempfile import threading import traceback -import yaml + +try: + import yaml +except ImportError: + yaml = None is_py2 = sys.version[0] == '2' @@ -199,9 +203,10 @@ 'headers to output diagnostics from. Diagnostics from ' 'the main file of each translation unit are always ' 'displayed.') - parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', - help='Create a yaml file to store suggested fixes in, ' - 'which can be applied with clang-apply-replacements.') + if yaml: + parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', + help='Create a yaml file to store suggested fixes in, ' + 'which can be applied with clang-apply-replacements.') parser.add_argument('-j', type=int, default=0, help='number of tidy instances to be run in parallel.') parser.add_argument('files', nargs='*', default=['.*'], @@ -254,7 +259,7 @@ max_task = multiprocessing.cpu_count() tmpdir = None - if args.fix or args.export_fixes: + if args.fix or (yaml and args.export_fixes): check_clang_apply_replacements_binary(args) tmpdir = tempfile.mkdtemp() @@ -292,7 +297,7 @@ shutil.rmtree(tmpdir) os.kill(0, 9) - if args.export_fixes: + if yaml and args.export_fixes: print('Writing fixes to ' + args.export_fixes + ' ...') try: merge_replacement_files(tmpdir, args.export_fixes) Index: clangd/ClangdLSPServer.h =================================================================== --- clangd/ClangdLSPServer.h +++ clangd/ClangdLSPServer.h @@ -40,6 +40,7 @@ ClangdLSPServer(Transport &Transp, const FileSystemProvider &FSProvider, const clangd::CodeCompleteOptions &CCOpts, llvm::Optional CompileCommandsDir, bool UseDirBasedCDB, + llvm::Optional ForcedOffsetEncoding, const ClangdServer::Options &Opts); ~ClangdLSPServer(); @@ -165,6 +166,7 @@ // It is destroyed before run() returns, to ensure worker threads exit. ClangdServer::Options ClangdServerOpts; llvm::Optional Server; + llvm::Optional NegotiatedOffsetEncoding; }; } // namespace clangd } // namespace clang Index: clangd/ClangdLSPServer.cpp =================================================================== --- clangd/ClangdLSPServer.cpp +++ clangd/ClangdLSPServer.cpp @@ -13,6 +13,7 @@ #include "Trace.h" #include "URI.h" #include "clang/Tooling/Core/Replacement.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" @@ -93,6 +94,7 @@ MessageHandler(ClangdLSPServer &Server) : Server(Server) {} bool onNotify(llvm::StringRef Method, llvm::json::Value Params) override { + WithContext HandlerContext(handlerContext()); log("<-- {0}", Method); if (Method == "exit") return false; @@ -109,6 +111,7 @@ bool onCall(llvm::StringRef Method, llvm::json::Value Params, llvm::json::Value ID) override { + WithContext HandlerContext(handlerContext()); // Calls can be canceled by the client. Add cancellation context. WithContext WithCancel(cancelableRequestContext(ID)); trace::Span Tracer(Method); @@ -129,6 +132,7 @@ bool onReply(llvm::json::Value ID, llvm::Expected Result) override { + WithContext HandlerContext(handlerContext()); // We ignore replies, just log them. if (Result) log("<-- reply({0})", ID); @@ -259,6 +263,13 @@ if (It != RequestCancelers.end()) It->second.first(); // Invoke the canceler. } + + Context handlerContext() const { + return Context::current().derive( + kCurrentOffsetEncoding, + Server.NegotiatedOffsetEncoding.getValueOr(OffsetEncoding::UTF16)); + } + // We run cancelable requests in a context that does two things: // - allows cancellation using RequestCancelers[ID] // - cleans up the entry in RequestCancelers when it's no longer needed @@ -302,6 +313,20 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params, Callback Reply) { + // Determine character encoding first as it affects constructed ClangdServer. + if (Params.capabilities.offsetEncoding && !NegotiatedOffsetEncoding) { + NegotiatedOffsetEncoding = OffsetEncoding::UTF16; // fallback + for (OffsetEncoding Supported : *Params.capabilities.offsetEncoding) + if (Supported != OffsetEncoding::UnsupportedEncoding) { + NegotiatedOffsetEncoding = Supported; + break; + } + } + llvm::Optional WithOffsetEncoding; + if (NegotiatedOffsetEncoding) + WithOffsetEncoding.emplace(kCurrentOffsetEncoding, + *NegotiatedOffsetEncoding); + if (Params.rootUri && *Params.rootUri) ClangdServerOpts.WorkspaceRoot = Params.rootUri->file(); else if (Params.rootPath && !Params.rootPath->empty()) @@ -331,7 +356,7 @@ SupportsHierarchicalDocumentSymbol = Params.capabilities.HierarchicalDocumentSymbol; SupportFileStatus = Params.initializationOptions.FileStatus; - Reply(llvm::json::Object{ + llvm::json::Object Result{ {{"capabilities", llvm::json::Object{ {"textDocumentSync", (int)TextDocumentSyncKind::Incremental}, @@ -369,7 +394,10 @@ ExecuteCommandParams::CLANGD_APPLY_TWEAK}}, }}, {"typeHierarchyProvider", true}, - }}}}); + }}}}; + if (NegotiatedOffsetEncoding) + Result["offsetEncoding"] = *NegotiatedOffsetEncoding; + Reply(std::move(Result)); } void ClangdLSPServer::onShutdown(const ShutdownParams &Params, @@ -875,19 +903,19 @@ std::move(Reply)); } -ClangdLSPServer::ClangdLSPServer(class Transport &Transp, - const FileSystemProvider &FSProvider, - const clangd::CodeCompleteOptions &CCOpts, - llvm::Optional CompileCommandsDir, - bool UseDirBasedCDB, - const ClangdServer::Options &Opts) +ClangdLSPServer::ClangdLSPServer( + class Transport &Transp, const FileSystemProvider &FSProvider, + const clangd::CodeCompleteOptions &CCOpts, + llvm::Optional CompileCommandsDir, bool UseDirBasedCDB, + llvm::Optional ForcedOffsetEncoding, + const ClangdServer::Options &Opts) : Transp(Transp), MsgHandler(new MessageHandler(*this)), FSProvider(FSProvider), CCOpts(CCOpts), SupportedSymbolKinds(defaultSymbolKinds()), SupportedCompletionItemKinds(defaultCompletionItemKinds()), UseDirBasedCDB(UseDirBasedCDB), - CompileCommandsDir(std::move(CompileCommandsDir)), - ClangdServerOpts(Opts) { + CompileCommandsDir(std::move(CompileCommandsDir)), ClangdServerOpts(Opts), + NegotiatedOffsetEncoding(ForcedOffsetEncoding) { // clang-format off MsgHandler->bind("initialize", &ClangdLSPServer::onInitialize); MsgHandler->bind("shutdown", &ClangdLSPServer::onShutdown); Index: clangd/Protocol.h =================================================================== --- clangd/Protocol.h +++ clangd/Protocol.h @@ -28,6 +28,7 @@ #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/Optional.h" #include "llvm/Support/JSON.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -338,6 +339,21 @@ // https://github.com/Microsoft/language-server-protocol/issues/344 SymbolKind indexSymbolKindToSymbolKind(index::SymbolKind Kind); +// Determines the encoding used to measure offsets and lengths of source in LSP. +enum class OffsetEncoding { + // Any string is legal on the wire. Unrecognized encodings parse as this. + UnsupportedEncoding, + // Length counts code units of UTF-16 encoded text. (Standard LSP behavior). + UTF16, + // Length counts bytes of UTF-8 encoded text. (Clangd extension). + UTF8, + // Length counts codepoints in unicode text. (Clangd extension). + UTF32, +}; +llvm::json::Value toJSON(const OffsetEncoding &); +bool fromJSON(const llvm::json::Value &, OffsetEncoding &); +llvm::raw_ostream &operator<<(llvm::raw_ostream &, OffsetEncoding OS); + // This struct doesn't mirror LSP! // The protocol defines deeply nested structures for client capabilities. // Instead of mapping them all, this just parses out the bits we care about. @@ -369,6 +385,9 @@ /// Client supports CodeAction return value for textDocument/codeAction. /// textDocument.codeAction.codeActionLiteralSupport. bool CodeActionStructure = false; + + /// Supported encodings for LSP character offsets. (clangd extension). + llvm::Optional> offsetEncoding; }; bool fromJSON(const llvm::json::Value &, ClientCapabilities &); Index: clangd/Protocol.cpp =================================================================== --- clangd/Protocol.cpp +++ clangd/Protocol.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/Hashing.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/JSON.h" @@ -311,6 +312,11 @@ } } } + if (auto *OffsetEncoding = O->get("offsetEncoding")) { + R.offsetEncoding.emplace(); + if (!fromJSON(*OffsetEncoding, *R.offsetEncoding)) + return false; + } return true; } @@ -932,5 +938,33 @@ return fromJSON(Params, Base); } +static const char *toString(OffsetEncoding OE) { + switch (OE) { + case OffsetEncoding::UTF8: + return "utf-8"; + case OffsetEncoding::UTF16: + return "utf-16"; + case OffsetEncoding::UTF32: + return "utf-32"; + case OffsetEncoding::UnsupportedEncoding: + return "unknown"; + } +} +llvm::json::Value toJSON(const OffsetEncoding &OE) { return toString(OE); } +bool fromJSON(const llvm::json::Value &V, OffsetEncoding &OE) { + auto Str = V.getAsString(); + if (!Str) + return false; + OE = llvm::StringSwitch(*Str) + .Case("utf-8", OffsetEncoding::UTF8) + .Case("utf-16", OffsetEncoding::UTF16) + .Case("utf-32", OffsetEncoding::UTF32) + .Default(OffsetEncoding::UnsupportedEncoding); + return true; +} +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, OffsetEncoding Enc) { + return OS << toString(Enc); +} + } // namespace clangd } // namespace clang Index: clangd/SourceCode.h =================================================================== --- clangd/SourceCode.h +++ clangd/SourceCode.h @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_SOURCECODE_H +#include "Context.h" #include "Protocol.h" #include "clang/Basic/Diagnostic.h" #include "clang/Basic/LangOptions.h" @@ -34,8 +35,14 @@ FileDigest digest(StringRef Content); Optional digestFile(const SourceManager &SM, FileID FID); +// This context variable controls the behavior of functions in this file +// that convert between LSP offsets and native clang byte offsets. +// If not set, defaults to UTF-16 for backwards-compatibility. +extern Key kCurrentOffsetEncoding; + // Counts the number of UTF-16 code units needed to represent a string (LSP // specifies string lengths in UTF-16 code units). +// Use of UTF-16 may be overridden by kCurrentOffsetEncoding. size_t lspLength(StringRef Code); /// Turn a [line, column] pair into an offset in Code. Index: clangd/SourceCode.cpp =================================================================== --- clangd/SourceCode.cpp +++ clangd/SourceCode.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "SourceCode.h" +#include "Context.h" #include "Logger.h" +#include "Protocol.h" #include "clang/AST/ASTContext.h" #include "clang/Basic/SourceManager.h" #include "clang/Lex/Lexer.h" @@ -15,6 +17,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Path.h" namespace clang { @@ -28,6 +31,8 @@ // Returns true if CB returned true, false if we hit the end of string. template static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { + // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). + // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. for (size_t I = 0; I < U8.size();) { unsigned char C = static_cast(U8[I]); if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character. @@ -51,31 +56,75 @@ return false; } -// Returns the offset into the string that matches \p Units UTF-16 code units. -// Conceptually, this converts to UTF-16, truncates to CodeUnits, converts back -// to UTF-8, and returns the length in bytes. -static size_t measureUTF16(llvm::StringRef U8, int U16Units, bool &Valid) { +// Returns the byte offset into the string that is an offset of \p Units in +// the specified encoding. +// Conceptually, this converts to the encoding, truncates to CodeUnits, +// converts back to UTF-8, and returns the length in bytes. +static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc, + bool &Valid) { + Valid = Units >= 0; + if (Units <= 0) + return 0; size_t Result = 0; - Valid = U16Units == 0 || iterateCodepoints(U8, [&](int U8Len, int U16Len) { - Result += U8Len; - U16Units -= U16Len; - return U16Units <= 0; - }); - if (U16Units < 0) // Offset was into the middle of a surrogate pair. - Valid = false; + switch (Enc) { + case OffsetEncoding::UTF8: + Result = Units; + break; + case OffsetEncoding::UTF16: + Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { + Result += U8Len; + Units -= U16Len; + return Units <= 0; + }); + if (Units < 0) // Offset in the middle of a surrogate pair. + Valid = false; + break; + case OffsetEncoding::UTF32: + Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { + Result += U8Len; + Units--; + return Units <= 0; + }); + break; + case OffsetEncoding::UnsupportedEncoding: + llvm_unreachable("unsupported encoding"); + } // Don't return an out-of-range index if we overran. - return std::min(Result, U8.size()); + if (Result > U8.size()) { + Valid = false; + return U8.size(); + } + return Result; +} + +Key kCurrentOffsetEncoding; +static OffsetEncoding lspEncoding() { + auto *Enc = Context::current().get(kCurrentOffsetEncoding); + return Enc ? *Enc : OffsetEncoding::UTF16; } // Like most strings in clangd, the input is UTF-8 encoded. size_t lspLength(llvm::StringRef Code) { - // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). - // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. size_t Count = 0; - iterateCodepoints(Code, [&](int U8Len, int U16Len) { - Count += U16Len; - return false; - }); + switch (lspEncoding()) { + case OffsetEncoding::UTF8: + Count = Code.size(); + break; + case OffsetEncoding::UTF16: + iterateCodepoints(Code, [&](int U8Len, int U16Len) { + Count += U16Len; + return false; + }); + break; + case OffsetEncoding::UTF32: + iterateCodepoints(Code, [&](int U8Len, int U16Len) { + ++Count; + return false; + }); + break; + case OffsetEncoding::UnsupportedEncoding: + llvm_unreachable("unsupported encoding"); + } return Count; } @@ -98,20 +147,18 @@ llvm::errc::invalid_argument); StartOfLine = NextNL + 1; } + StringRef Line = + Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); - size_t NextNL = Code.find('\n', StartOfLine); - if (NextNL == llvm::StringRef::npos) - NextNL = Code.size(); - + // P.character may be in UTF-16, transcode if necessary. bool Valid; - size_t ByteOffsetInLine = measureUTF16( - Code.substr(StartOfLine, NextNL - StartOfLine), P.character, Valid); + size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); if (!Valid && !AllowColumnsBeyondLineLength) return llvm::make_error( - llvm::formatv("UTF-16 offset {0} is invalid for line {1}", P.character, - P.line), + llvm::formatv("{0} offset {1} is invalid for line {2}", lspEncoding(), + P.character, P.line), llvm::errc::invalid_argument); - return StartOfLine + ByteOffsetInLine; + return StartOfLine + ByteInLine; } Position offsetToPosition(llvm::StringRef Code, size_t Offset) { Index: clangd/index/IndexAction.cpp =================================================================== --- clangd/index/IndexAction.cpp +++ clangd/index/IndexAction.cpp @@ -9,7 +9,6 @@ #include "IndexAction.h" #include "index/SymbolOrigin.h" #include "clang/Frontend/CompilerInstance.h" -#include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexingAction.h" #include "clang/Tooling/Tooling.h" @@ -136,6 +135,11 @@ bool BeginInvocation(CompilerInstance &CI) override { // We want all comments, not just the doxygen ones. CI.getLangOpts().CommentOpts.ParseAllComments = true; + // Index the whole file even if there are warnings and -Werror is't set. + // Avoids some analyses too. Set in two places as we're late to the party. + CI.getDiagnosticOpts().IgnoreWarnings = true; + CI.getDiagnostics().setIgnoreAllWarnings(true); + return WrapperFrontendAction::BeginInvocation(CI); } Index: clangd/index/SymbolLocation.h =================================================================== --- clangd/index/SymbolLocation.h +++ clangd/index/SymbolLocation.h @@ -20,6 +20,13 @@ // Specify a position (Line, Column) of symbol. Using Line/Column allows us to // build LSP responses without reading the file content. // + // clangd uses the following definitions, which differ slightly from LSP: + // - Line is the number of newline characters (\n) before the point. + // - Column is (by default) the number of UTF-16 code between the last \n + // (or start of file) and the point. + // If the `offsetEncoding` protocol extension is used to negotiate UTF-8, + // then it is instead the number of *bytes* since the last \n. + // // Position is encoded into 32 bits to save space. // If Line/Column overflow, the value will be their maximum value. struct Position { @@ -37,8 +44,7 @@ static constexpr uint32_t MaxColumn = (1 << 12) - 1; private: - uint32_t Line : 20; // 0-based - // Using UTF-16 code units. + uint32_t Line : 20; // 0-based uint32_t Column : 12; // 0-based }; Index: clangd/tool/ClangdMain.cpp =================================================================== --- clangd/tool/ClangdMain.cpp +++ clangd/tool/ClangdMain.cpp @@ -9,10 +9,12 @@ #include "Features.inc" #include "ClangdLSPServer.h" #include "Path.h" +#include "Protocol.h" #include "Trace.h" #include "Transport.h" #include "index/Serialization.h" #include "clang/Basic/Version.h" +#include "llvm/ADT/Optional.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -219,6 +221,16 @@ "includes using index."), llvm::cl::init(true)); +static llvm::cl::opt ForceOffsetEncoding( + "offset-encoding", + llvm::cl::desc("Force the offsetEncoding used for character positions. " + "This bypasses negotiation via client capabilities."), + llvm::cl::values(clEnumValN(OffsetEncoding::UTF8, "utf-8", + "Offsets are in UTF-8 bytes"), + clEnumValN(OffsetEncoding::UTF16, "utf-16", + "Offsets are in UTF-16 code units")), + llvm::cl::init(OffsetEncoding::UnsupportedEncoding)); + namespace { /// \brief Supports a test URI scheme with relaxed constraints for lit tests. @@ -458,9 +470,13 @@ } Opts.ClangTidyOptProvider = ClangTidyOptProvider.get(); Opts.SuggestMissingIncludes = SuggestMissingIncludes; + llvm::Optional OffsetEncodingFromFlag; + if (ForceOffsetEncoding != OffsetEncoding::UnsupportedEncoding) + OffsetEncodingFromFlag = ForceOffsetEncoding; ClangdLSPServer LSPServer( *TransportLayer, FSProvider, CCOpts, CompileCommandsDirPath, - /*UseDirBasedCDB=*/CompileArgsFrom == FilesystemCompileArgs, Opts); + /*UseDirBasedCDB=*/CompileArgsFrom == FilesystemCompileArgs, + OffsetEncodingFromFlag, Opts); llvm::set_thread_name("clangd.main"); return LSPServer.run() ? 0 : static_cast(ErrorResultCode::NoShutdownRequest); Index: test/clang-tidy/run-clang-tidy.cpp =================================================================== --- test/clang-tidy/run-clang-tidy.cpp +++ test/clang-tidy/run-clang-tidy.cpp @@ -1,3 +1,4 @@ +// RUN: %run_clang_tidy --help // RUN: rm -rf %t // RUN: mkdir %t // RUN: echo "[{\"directory\":\".\",\"command\":\"clang++ -c %/t/test.cpp\",\"file\":\"%/t/test.cpp\"}]" | sed -e 's/\\/\\\\/g' > %t/compile_commands.json Index: test/clangd/utf8.test =================================================================== --- /dev/null +++ test/clangd/utf8.test @@ -0,0 +1,32 @@ +# RUN: clangd -lit-test < %s | FileCheck -strict-whitespace %s +# This test verifies that we can negotiate UTF-8 offsets via protocol extension. +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{"offsetEncoding":["utf-8","utf-16"]},"trace":"off"}} +# CHECK: "offsetEncoding": "utf-8" +--- +{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///main.cpp","languageId":"cpp","version":1,"text":"/*ΓΆ*/int x;\nint y=x;"}}} +--- +{"jsonrpc":"2.0","id":1,"method":"textDocument/definition","params":{"textDocument":{"uri":"test:///main.cpp"},"position":{"line":1,"character":6}}} +# /*ΓΆ*/int x; +# 01234567890 +# x is character (and utf-16) range [9,10) but byte range [10,11). +# CHECK: "id": 1, +# CHECK-NEXT: "jsonrpc": "2.0", +# CHECK-NEXT: "result": [ +# CHECK-NEXT: { +# CHECK-NEXT: "range": { +# CHECK-NEXT: "end": { +# CHECK-NEXT: "character": 11, +# CHECK-NEXT: "line": 0 +# CHECK-NEXT: }, +# CHECK-NEXT: "start": { +# CHECK-NEXT: "character": 10, +# CHECK-NEXT: "line": 0 +# CHECK-NEXT: } +# CHECK-NEXT: }, +# CHECK-NEXT: "uri": "file://{{.*}}/main.cpp" +# CHECK-NEXT: } +# CHECK-NEXT: ] +--- +{"jsonrpc":"2.0","id":10000,"method":"shutdown"} +--- +{"jsonrpc":"2.0","method":"exit"} Index: unittests/clangd/IndexActionTests.cpp =================================================================== --- unittests/clangd/IndexActionTests.cpp +++ unittests/clangd/IndexActionTests.cpp @@ -29,6 +29,8 @@ MATCHER_P(HasDigest, Digest, "") { return arg.Digest == Digest; } +MATCHER_P(HasName, Name, "") { return arg.Name == Name; } + MATCHER(HasSameURI, "") { llvm::StringRef URI = testing::get<0>(arg); const std::string &Path = testing::get<1>(arg); @@ -43,6 +45,7 @@ void checkNodesAreInitialized(const IndexFileIn &IndexFile, const std::vector &Paths) { + ASSERT_TRUE(IndexFile.Sources); EXPECT_THAT(Paths.size(), IndexFile.Sources->size()); for (llvm::StringRef Path : Paths) { auto URI = toUri(Path); @@ -224,6 +227,27 @@ HasDigest(digest(HeaderCode)))))); } +TEST_F(IndexActionTest, NoWarnings) { + std::string MainFilePath = testPath("main.cpp"); + std::string MainCode = R"cpp( + void foo(int x) { + if (x = 1) // -Wparentheses + return; + if (x = 1) // -Wparentheses + return; + } + void bar() {} + )cpp"; + addFile(MainFilePath, MainCode); + // We set -ferror-limit so the warning-promoted-to-error would be fatal. + // This would cause indexing to stop (if warnings weren't disabled). + IndexFileIn IndexFile = runIndexingAction( + MainFilePath, {"-ferror-limit=1", "-Wparentheses", "-Werror"}); + ASSERT_TRUE(IndexFile.Sources); + ASSERT_NE(0u, IndexFile.Sources->size()); + EXPECT_THAT(*IndexFile.Symbols, ElementsAre(HasName("foo"), HasName("bar"))); +} + } // namespace } // namespace clangd } // namespace clang Index: unittests/clangd/SourceCodeTests.cpp =================================================================== --- unittests/clangd/SourceCodeTests.cpp +++ unittests/clangd/SourceCodeTests.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// #include "Annotations.h" +#include "Context.h" +#include "Protocol.h" #include "SourceCode.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_os_ostream.h" @@ -21,14 +23,9 @@ using llvm::HasValue; MATCHER_P2(Pos, Line, Col, "") { - return arg.line == Line && arg.character == Col; + return arg.line == int(Line) && arg.character == int(Col); } -// The = β†’ πŸ‘† below are ASCII (1 byte), BMP (3 bytes), and astral (4 bytes). -const char File[] = R"(0:0 = 0 -1:0 β†’ 8 -2:0 πŸ‘† 18)"; - /// A helper to make tests easier to read. Position position(int line, int character) { Position Pos; @@ -52,8 +49,37 @@ EXPECT_EQ(lspLength("Β₯"), 1UL); // astral EXPECT_EQ(lspLength("πŸ˜‚"), 2UL); + + WithContextValue UTF8(kCurrentOffsetEncoding, OffsetEncoding::UTF8); + EXPECT_EQ(lspLength(""), 0UL); + EXPECT_EQ(lspLength("ascii"), 5UL); + // BMP + EXPECT_EQ(lspLength("↓"), 3UL); + EXPECT_EQ(lspLength("Β₯"), 2UL); + // astral + EXPECT_EQ(lspLength("πŸ˜‚"), 4UL); + + WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32); + EXPECT_EQ(lspLength(""), 0UL); + EXPECT_EQ(lspLength("ascii"), 5UL); + // BMP + EXPECT_EQ(lspLength("↓"), 1UL); + EXPECT_EQ(lspLength("Β₯"), 1UL); + // astral + EXPECT_EQ(lspLength("πŸ˜‚"), 1UL); } +// The = β†’ πŸ‘† below are ASCII (1 byte), BMP (3 bytes), and astral (4 bytes). +const char File[] = R"(0:0 = 0 +1:0 β†’ 8 +2:0 πŸ‘† 18)"; +struct Line { + unsigned Number; + unsigned Offset; + unsigned Length; +}; +Line FileLines[] = {Line{0, 0, 7}, Line{1, 8, 9}, Line{2, 18, 11}}; + TEST(SourceCodeTests, PositionToOffset) { // line out of bounds EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), llvm::Failed()); @@ -113,6 +139,80 @@ // line out of bounds EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), llvm::Failed()); EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 1)), llvm::Failed()); + + // Codepoints are similar, except near astral characters. + WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32); + // line out of bounds + EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), llvm::Failed()); + // first line + EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, -1)), + llvm::Failed()); // out of range + EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 0)), + llvm::HasValue(0)); // first character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 3)), + llvm::HasValue(3)); // middle character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 6)), + llvm::HasValue(6)); // last character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7)), + llvm::HasValue(7)); // the newline itself + EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7), false), + llvm::HasValue(7)); + EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8)), + llvm::HasValue(7)); // out of range + EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8), false), + llvm::Failed()); // out of range + // middle line + EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, -1)), + llvm::Failed()); // out of range + EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 0)), + llvm::HasValue(8)); // first character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3)), + llvm::HasValue(11)); // middle character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3), false), + llvm::HasValue(11)); + EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 6)), + llvm::HasValue(16)); // last character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 7)), + llvm::HasValue(17)); // the newline itself + EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8)), + llvm::HasValue(17)); // out of range + EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8), false), + llvm::Failed()); // out of range + // last line + EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, -1)), + llvm::Failed()); // out of range + EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 0)), + llvm::HasValue(18)); // first character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 4)), + llvm::HasValue(22)); // Before astral character. + EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 5), false), + llvm::HasValue(26)); // after astral character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 7)), + llvm::HasValue(28)); // last character + EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 8)), + llvm::HasValue(29)); // EOF + EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 9), false), + llvm::Failed()); // out of range + // line out of bounds + EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), llvm::Failed()); + EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 1)), llvm::Failed()); + + // Test UTF-8, where transformations are trivial. + WithContextValue UTF8(kCurrentOffsetEncoding, OffsetEncoding::UTF8); + EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), llvm::Failed()); + EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), llvm::Failed()); + for (Line L : FileLines) { + EXPECT_THAT_EXPECTED(positionToOffset(File, position(L.Number, -1)), + llvm::Failed()); // out of range + for (unsigned I = 0; I <= L.Length; ++I) + EXPECT_THAT_EXPECTED(positionToOffset(File, position(L.Number, I)), + llvm::HasValue(L.Offset + I)); + EXPECT_THAT_EXPECTED(positionToOffset(File, position(L.Number, L.Length+1)), + llvm::HasValue(L.Offset + L.Length)); + EXPECT_THAT_EXPECTED( + positionToOffset(File, position(L.Number, L.Length + 1), false), + llvm::Failed()); // out of range + } } TEST(SourceCodeTests, OffsetToPosition) { @@ -134,6 +234,34 @@ EXPECT_THAT(offsetToPosition(File, 28), Pos(2, 8)) << "end of last line"; EXPECT_THAT(offsetToPosition(File, 29), Pos(2, 9)) << "EOF"; EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 9)) << "out of bounds"; + + // Codepoints are similar, except near astral characters. + WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32); + EXPECT_THAT(offsetToPosition(File, 0), Pos(0, 0)) << "start of file"; + EXPECT_THAT(offsetToPosition(File, 3), Pos(0, 3)) << "in first line"; + EXPECT_THAT(offsetToPosition(File, 6), Pos(0, 6)) << "end of first line"; + EXPECT_THAT(offsetToPosition(File, 7), Pos(0, 7)) << "first newline"; + EXPECT_THAT(offsetToPosition(File, 8), Pos(1, 0)) << "start of second line"; + EXPECT_THAT(offsetToPosition(File, 12), Pos(1, 4)) << "before BMP char"; + EXPECT_THAT(offsetToPosition(File, 13), Pos(1, 5)) << "in BMP char"; + EXPECT_THAT(offsetToPosition(File, 15), Pos(1, 5)) << "after BMP char"; + EXPECT_THAT(offsetToPosition(File, 16), Pos(1, 6)) << "end of second line"; + EXPECT_THAT(offsetToPosition(File, 17), Pos(1, 7)) << "second newline"; + EXPECT_THAT(offsetToPosition(File, 18), Pos(2, 0)) << "start of last line"; + EXPECT_THAT(offsetToPosition(File, 21), Pos(2, 3)) << "in last line"; + EXPECT_THAT(offsetToPosition(File, 22), Pos(2, 4)) << "before astral char"; + EXPECT_THAT(offsetToPosition(File, 24), Pos(2, 5)) << "in astral char"; + EXPECT_THAT(offsetToPosition(File, 26), Pos(2, 5)) << "after astral char"; + EXPECT_THAT(offsetToPosition(File, 28), Pos(2, 7)) << "end of last line"; + EXPECT_THAT(offsetToPosition(File, 29), Pos(2, 8)) << "EOF"; + EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 8)) << "out of bounds"; + + WithContextValue UTF8(kCurrentOffsetEncoding, OffsetEncoding::UTF8); + for (Line L : FileLines) { + for (unsigned I = 0; I <= L.Length; ++I) + EXPECT_THAT(offsetToPosition(File, L.Offset + I), Pos(L.Number, I)); + } + EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 11)) << "out of bounds"; } TEST(SourceCodeTests, IsRangeConsecutive) {