Index: clangd/CMakeLists.txt =================================================================== --- clangd/CMakeLists.txt +++ clangd/CMakeLists.txt @@ -35,6 +35,7 @@ ExpectedTypes.cpp FindSymbols.cpp FileDistance.cpp + Format.cpp FS.cpp FSProvider.cpp FuzzyMatch.cpp Index: clangd/ClangdLSPServer.cpp =================================================================== --- clangd/ClangdLSPServer.cpp +++ clangd/ClangdLSPServer.cpp @@ -364,7 +364,7 @@ {"documentRangeFormattingProvider", true}, {"documentOnTypeFormattingProvider", llvm::json::Object{ - {"firstTriggerCharacter", "}"}, + {"firstTriggerCharacter", "\n"}, {"moreTriggerCharacter", {}}, }}, {"codeActionProvider", true}, @@ -581,7 +581,8 @@ "onDocumentOnTypeFormatting called for non-added file", ErrorCode::InvalidParams)); - auto ReplacementsOrError = Server->formatOnType(*Code, File, Params.position); + auto ReplacementsOrError = + Server->formatOnType(*Code, File, Params.position, Params.ch); if (ReplacementsOrError) Reply(replacementsToEdits(*Code, ReplacementsOrError.get())); else Index: clangd/ClangdServer.h =================================================================== --- clangd/ClangdServer.h +++ clangd/ClangdServer.h @@ -207,10 +207,11 @@ llvm::Expected formatFile(StringRef Code, PathRef File); - /// Run formatting after a character was typed at \p Pos in \p File with + /// Run formatting after \p TriggerText was typed at \p Pos in \p File with /// content \p Code. - llvm::Expected - formatOnType(StringRef Code, PathRef File, Position Pos); + llvm::Expected formatOnType(StringRef Code, + PathRef File, Position Pos, + StringRef TriggerText); /// Rename all occurrences of the symbol at the \p Pos in \p File to /// \p NewName. Index: clangd/ClangdServer.cpp =================================================================== --- clangd/ClangdServer.cpp +++ clangd/ClangdServer.cpp @@ -10,6 +10,7 @@ #include "ClangdUnit.h" #include "CodeComplete.h" #include "FindSymbols.h" +#include "Format.h" #include "Headers.h" #include "Protocol.h" #include "SourceCode.h" @@ -277,19 +278,18 @@ } llvm::Expected -ClangdServer::formatOnType(llvm::StringRef Code, PathRef File, Position Pos) { - // Look for the previous opening brace from the character position and - // format starting from there. +ClangdServer::formatOnType(llvm::StringRef Code, PathRef File, Position Pos, + StringRef TriggerText) { llvm::Expected CursorPos = positionToOffset(Code, Pos); if (!CursorPos) return CursorPos.takeError(); - size_t PreviousLBracePos = - llvm::StringRef(Code).find_last_of('{', *CursorPos); - if (PreviousLBracePos == llvm::StringRef::npos) - PreviousLBracePos = *CursorPos; - size_t Len = *CursorPos - PreviousLBracePos; + auto FS = FSProvider.getFileSystem(); + auto Style = format::getStyle(format::DefaultFormatStyle, File, + format::DefaultFallbackStyle, Code, FS.get()); + if (!Style) + return Style.takeError(); - return formatCode(Code, File, {tooling::Range(PreviousLBracePos, Len)}); + return formatIncremental(Code, *CursorPos, TriggerText, *Style); } void ClangdServer::rename(PathRef File, Position Pos, llvm::StringRef NewName, Index: clangd/Format.h =================================================================== --- /dev/null +++ clangd/Format.h @@ -0,0 +1,45 @@ +//===--- Format.h - automatic code formatting ---------------*- C++-*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Clangd uses clang-format for formatting operations. +// This file adapts it to support new scenarios like format-on-type. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMAT_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FORMAT_H + +#include "Protocol.h" +#include "clang/Format/Format.h" +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace clangd { + +/// Applies limited formatting around new \p InsertedText. +/// The \p Code already contains the updated text near \p Cursor, and may have +/// had additional / characters (such as indentation) inserted by the editor. +/// +/// Example breaking a line (^ is the cursor): +/// === before newline is typed === +/// if(1){^} +/// === after newline is typed and editor indents === +/// if(1){ +/// ^} +/// === after formatIncremental(InsertedText="\n") === +/// if (1) { +/// ^ +/// } +tooling::Replacements formatIncremental(llvm::StringRef Code, unsigned Cursor, + llvm::StringRef InsertedText, + format::FormatStyle Style); + +} // namespace clangd +} // namespace clang + +#endif + Index: clangd/Format.cpp =================================================================== --- /dev/null +++ clangd/Format.cpp @@ -0,0 +1,226 @@ +//===--- Format.cpp -----------------------------------------*- C++-*------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "Format.h" +#include "Logger.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Lex/Lexer.h" +#include "llvm/Support/Unicode.h" + +namespace clang { +namespace clangd { +namespace { + +/// Append closing brackets )]} to \p Code to make it well-formed. +/// Clang-format conservatively refuses to format files with unmatched brackets +/// as it isn't sure where the errors are and so can't correct. +/// When editing, it's reasonable to assume code before the cursor is complete. +void closeBrackets(std::string &Code, const format::FormatStyle &Style) { + SourceManagerForFile FileSM("dummy.cpp", Code); + auto &SM = FileSM.get(); + FileID FID = SM.getMainFileID(); + Lexer Lex(FID, SM.getBuffer(FID), SM, format::getFormattingLangOpts(Style)); + Token Tok; + std::vector Brackets; + while (!Lex.LexFromRawLexer(Tok)) { + switch(Tok.getKind()) { + case tok::l_paren: + Brackets.push_back(')'); + break; + case tok::l_brace: + Brackets.push_back('}'); + break; + case tok::l_square: + Brackets.push_back(']'); + break; + case tok::r_paren: + if (!Brackets.empty() && Brackets.back() == ')') + Brackets.pop_back(); + break; + case tok::r_brace: + if (!Brackets.empty() && Brackets.back() == '}') + Brackets.pop_back(); + break; + case tok::r_square: + if (!Brackets.empty() && Brackets.back() == ']') + Brackets.pop_back(); + break; + default: + continue; + } + } + // Attempt to end any open comments first. + Code.append("\n// */\n"); + Code.append(Brackets.rbegin(), Brackets.rend()); +} + +static StringRef commentMarker(llvm::StringRef Line) { + for (StringRef Marker : {"///", "//"}){ + auto I = Line.rfind(Marker); + if (I != StringRef::npos) + return Line.substr(I, Marker.size()); + } + return ""; +} + +llvm::StringRef firstLine(llvm::StringRef Code) { + return Code.take_until([](char C) { return C == '\n'; }); +} + +llvm::StringRef lastLine(llvm::StringRef Code) { + llvm::StringRef Rest = Code; + while (!Rest.empty() && Rest.back() != '\n') + Rest = Rest.drop_back(); + return Code.substr(Rest.size()); +} + +// tooling::Replacement from overlapping StringRefs: From must be part of Code. +tooling::Replacement replacement(llvm::StringRef Code, llvm::StringRef From, + llvm::StringRef To) { + assert(From.begin() >= From.begin() && From.end() <= From.end()); + // The filename is required but ignored. + return tooling::Replacement("format-scratch", From.data() - Code.data(), + From.size(), To); +}; + +// High-level representation of incremental formatting changes. +// The changes are made in two steps. +// 1) a (possibly-empty) set of changes synthesized by clangd (e.g. adding +// comment markers when splitting a line comment with a newline). +// 2) a selective clang-format run: +// - the "source code" passed to clang format is the code up to the cursor, +// a placeholder for the cursor, and some closing brackets +// - the formatting is restricted to the cursor and (possibly) other ranges +// (e.g. the old line when inserting a newline). +// - changes before the cursor are applied, those after are discarded. +struct IncrementalChanges { + // Changes that should be applied before running clang-format. + tooling::Replacements Changes; + // Ranges of the original source code that should be clang-formatted. + // The CursorProxyText will also be formatted. + std::vector FormatRanges; + // The source code that should stand in for the cursor when clang-formatting. + // e.g. after inserting a newline, a line-comment at the cursor is used to + // ensure that the newline is preserved. + std::string CursorPlaceholder; +}; + +// After a newline: +// - we continue any line-comment that was split +// - we format the old line in addition to the cursor +// - we represent the cursor with a line comment to preserve the newline +IncrementalChanges getIncrementalChangesAfterNewline(llvm::StringRef Code, + unsigned Cursor) { + IncrementalChanges Result; + // Before newline, code looked like: + // leading^trailing + // After newline, code looks like: + // leading + // indentation^trailing + // Where indentation was added by the editor. + StringRef Trailing = firstLine(Code.substr(Cursor)); + StringRef Indentation = lastLine(Code.take_front(Cursor)); + if (Indentation.data() == Code.data()) { + vlog("Typed a newline, but we're still on the first line!"); + return Result; + } + StringRef Leading = + lastLine(Code.take_front(Indentation.data() - Code.data() - 1)); + + // Strip leading whitespace on trailing line. + if (unsigned TrailWS = Trailing.size() - Trailing.ltrim().size()) + cantFail(Result.Changes.add( + replacement(Code, StringRef(Trailing.begin(), TrailWS), ""))); + + // If we split a comment, replace indentation with a comment marker. + StringRef CommentMarker = commentMarker(Leading); + if (!CommentMarker.empty() && !Trailing.trim().empty()) { + using llvm::sys::unicode::columnWidthUTF8; + // We indent the new comment to match the previous one. + StringRef PreComment = + Leading.take_front(CommentMarker.data() - Leading.data()); + std::string IndentAndComment = + (std::string(columnWidthUTF8(PreComment), ' ') + CommentMarker + " ") + .str(); + cantFail( + Result.Changes.add(replacement(Code, Indentation, IndentAndComment))); + } + + // Format the whole leading line. + Result.FormatRanges.push_back( + tooling::Range(Leading.data() - Code.data(), Leading.size())); + + // The identifier improves clang-format's parsing of e.g. for without braces. + Result.CursorPlaceholder = "//======\nident"; + + return Result; +} + +IncrementalChanges getIncrementalChanges(llvm::StringRef Code, unsigned Cursor, + llvm::StringRef InsertedText) { + IncrementalChanges Result; + if (InsertedText == "\n") + return getIncrementalChangesAfterNewline(Code, Cursor); + + Result.CursorPlaceholder = " /**/"; + return Result; +} + +} // namespace + +tooling::Replacements formatIncremental(llvm::StringRef OriginalCode, + unsigned Cursor, + llvm::StringRef InsertedText, + format::FormatStyle Style) { + IncrementalChanges Incremental = + getIncrementalChanges(OriginalCode, Cursor, InsertedText); + + // Update cursor, ranges, code after pre-formatting changes. + std::string Code = cantFail( + tooling::applyAllReplacements(OriginalCode, Incremental.Changes)); + Cursor = Incremental.Changes.getShiftedCodePosition(Cursor); + Incremental.FormatRanges = tooling::calculateRangesAfterReplacements( + Incremental.Changes, Incremental.FormatRanges); + + // Mutate code to prepare for clang-format run. + Code.resize(Cursor); + Code.append(Incremental.CursorPlaceholder); + closeBrackets(Code, Style); + + // Sanity check: we're not requesting formatting of any code we just erased. + for (const tooling::Range &R : Incremental.FormatRanges) { + assert(R.getOffset() + R.getLength() <= Cursor); + (void)R; + } + // As well as any ranges specified by IncrementalChanges, format the cursor. + Incremental.FormatRanges.push_back( + tooling::Range(Cursor, Incremental.CursorPlaceholder.size())); + dlog("Incrementally formatting at {0}:\n{1}", Cursor, Code); + + // Apply the formatting, but only accept changes up to the cursor. + tooling::Replacements FormattingChanges; + for (const tooling::Replacement &R : + format::reformat(Style, Code, Incremental.FormatRanges)) { + if (R.getOffset() + R.getLength() <= Cursor) + cantFail(FormattingChanges.add(R)); + else if(R.getOffset() < Cursor) { + // Eek, an edit that overlaps the cursor! + if (R.getReplacementText().empty()) + // Deletions are most likely and easy to handle. + cantFail(FormattingChanges.add(replacement( + Code, StringRef(Code).slice(R.getOffset(), Cursor), ""))); + else + // Hopefully won't happen in practice? + elog("Incremental clang-format edit overlapping cursor @ {0}!\n{1}", + Cursor, Code); + } + } + + return Incremental.Changes.merge(FormattingChanges); +} +} +} // namespace clang Index: unittests/clangd/CMakeLists.txt =================================================================== --- unittests/clangd/CMakeLists.txt +++ unittests/clangd/CMakeLists.txt @@ -27,6 +27,7 @@ FileDistanceTests.cpp FileIndexTests.cpp FindSymbolsTests.cpp + FormatTests.cpp FSTests.cpp FunctionTests.cpp FuzzyMatchTests.cpp Index: unittests/clangd/FormatTests.cpp =================================================================== --- /dev/null +++ unittests/clangd/FormatTests.cpp @@ -0,0 +1,155 @@ +//===-- FormatTests.cpp - Automatic code formatting tests -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Format.h" +#include "Annotations.h" +#include "SourceCode.h" +#include "TestFS.h" +#include "clang/Format/Format.h" +#include "clang/Tooling/Core/Replacement.h" +#include "llvm/Support/Error.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +namespace clang { +namespace clangd { +namespace { + +std::string afterTyped(llvm::StringRef CodeWithCursor, + llvm::StringRef Typed) { + Annotations Code(CodeWithCursor); + auto Cursor = llvm::cantFail(positionToOffset(Code.code(), Code.point())); + auto Changes = + formatIncremental(Code.code(), Cursor, Typed, + format::getGoogleStyle(format::FormatStyle::LK_Cpp)); + std::string NewCode = + cantFail(tooling::applyAllReplacements(Code.code(), Changes)); + NewCode.insert(Changes.getShiftedCodePosition(Cursor), "^"); + return NewCode; +} + +// We can't pass raw strings directly to EXPECT_EQ because of gcc bugs. +void expectAfterNewline(const char *Before, const char *After) { + EXPECT_EQ(After, afterTyped(Before, "\n")) << Before; +} + +TEST(FormatIncremental, SplitComment) { + expectAfterNewline(R"cpp( +// this comment was +^split +)cpp", + R"cpp( +// this comment was +// ^split +)cpp"); + + expectAfterNewline(R"cpp( +// trailing whitespace is not a split +^ +)cpp", + R"cpp( +// trailing whitespace is not a split +^ +)cpp"); + + + expectAfterNewline(R"cpp( +// extra + ^ whitespace +)cpp", + R"cpp( +// extra +// ^whitespace +)cpp"); + + expectAfterNewline(R"cpp( +/// triple +^slash +)cpp", + R"cpp( +/// triple +/// ^slash +)cpp"); + + expectAfterNewline(R"cpp( +int x; // aligned +^comment +)cpp", + R"cpp( +int x; // aligned + // ^comment +)cpp"); +} + +TEST(FormatIncremental, Indentation) { + expectAfterNewline(R"cpp( +void foo() { + if (bar) +^ +)cpp", + R"cpp( +void foo() { + if (bar) + ^ +)cpp"); + + expectAfterNewline(R"cpp( +void foo() { + bar(baz( +^ +)cpp", + R"cpp( +void foo() { + bar(baz( + ^ +)cpp"); + + expectAfterNewline(R"cpp( +class X { +protected: +^ +)cpp", + R"cpp( +class X { + protected: + ^ +)cpp"); +} + +TEST(FormatIncremental, FormatPreviousLine) { + expectAfterNewline(R"cpp( +void foo() { + untouched( ); +int x=2; +^ +)cpp", + R"cpp( +void foo() { + untouched( ); + int x = 2; + ^ +)cpp"); + + expectAfterNewline(R"cpp( +int x=untouched( ); +auto L = []{return;return;}; +^ +)cpp", + R"cpp( +int x=untouched( ); +auto L = [] { + return; + return; +}; +^ +)cpp"); +} + +} // namespace +} // namespace clangd +} // namespace clang